diff options
238 files changed, 4461 insertions, 1691 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 25dc4a0..75236f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2681,9 +2681,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Run specified binary instead of /init from the ramdisk, used for early userspace startup. See initrd. - reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode - Format: <reboot_mode>[,<reboot_mode2>[,...]] - See arch/*/kernel/reboot.c or arch/*/kernel/process.c + reboot= [KNL] + Format (x86 or x86_64): + [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \ + [[,]s[mp]#### \ + [[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \ + [[,]f[orce] + Where reboot_mode is one of warm (soft) or cold (hard) or gpio, + reboot_type is one of bios, acpi, kbd, triple, efi, or pci, + reboot_force is either force or not specified, + reboot_cpu is s[mp]#### with #### being the processor + to be used for rebooting. relax_domain_level= [KNL, SMP] Set scheduler's default relax_domain_level. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index dcc75a9..36ecc26 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -510,7 +510,7 @@ Specify "[Dd]efault" to request automatic configuration. Autoconfiguration will select "node" order in following case. (1) if the DMA zone does not exist or (2) if the DMA zone comprises greater than 50% of the available memory or -(3) if any node's DMA zone comprises greater than 60% of its local memory and +(3) if any node's DMA zone comprises greater than 70% of its local memory and the amount of local memory is big enough. Otherwise, "zone" order will be selected. Default order is recommended unless diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 8785fb8..4a63953 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -120,8 +120,8 @@ By default kernel tries to use huge zero page on read page fault. It's possible to disable huge zero page by writing 0 or enable it back by writing 1: -echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page -echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page +echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page +echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 3840b6f..fc66d42 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -657,9 +657,10 @@ Protocol: 2.08+ uncompressed data should be determined using the standard magic numbers. The currently supported compression formats are gzip (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA - (magic number 5D 00), and XZ (magic number FD 37). The uncompressed - payload is currently always ELF (magic number 7F 45 4C 46). - + (magic number 5D 00), XZ (magic number FD 37), and LZ4 (magic number + 02 21). The uncompressed payload is currently always ELF (magic + number 7F 45 4C 46). + Field name: payload_length Type: read Offset/size: 0x24c/4 diff --git a/MAINTAINERS b/MAINTAINERS index e03c40e..9623bc5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9268,6 +9268,13 @@ F: Documentation/networking/z8530drv.txt F: drivers/net/hamradio/*scc.c F: drivers/net/hamradio/z8530.h +ZBUD COMPRESSED PAGE ALLOCATOR +M: Seth Jennings <sjenning@linux.vnet.ibm.com> +L: linux-mm@kvack.org +S: Maintained +F: mm/zbud.c +F: include/linux/zbud.h + ZD1211RW WIRELESS DRIVER M: Daniel Drake <dsd@gentoo.org> M: Ulrich Kunitz <kune@deine-taler.de> @@ -9290,6 +9297,12 @@ M: "Maciej W. Rozycki" <macro@linux-mips.org> S: Maintained F: drivers/tty/serial/zs.* +ZSWAP COMPRESSED SWAP CACHING +M: Seth Jennings <sjenning@linux.vnet.ibm.com> +L: linux-mm@kvack.org +S: Maintained +F: mm/zswap.c + THE REST M: Linus Torvalds <torvalds@linux-foundation.org> L: linux-kernel@vger.kernel.org diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 318164c..0fd1f0d 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -207,8 +207,10 @@ out_of_memory: } up_read(&mm->mmap_sem); - if (user_mode(regs)) - do_group_exit(SIGKILL); /* This will never return */ + if (user_mode(regs)) { + pagefault_out_of_memory(); + return; + } goto no_context; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5ef7af0..0ac9be6 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -41,6 +41,7 @@ config ARM select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index f79a08e..47279aa 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -6,6 +6,7 @@ piggy.gzip piggy.lzo piggy.lzma piggy.xzkern +piggy.lz4 vmlinux vmlinux.lds diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 48d0a44..7ac1610 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -91,6 +91,7 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo suffix_$(CONFIG_KERNEL_LZMA) = lzma suffix_$(CONFIG_KERNEL_XZ) = xzkern +suffix_$(CONFIG_KERNEL_LZ4) = lz4 # Borrowed libfdt files for the ATAG compatibility mode @@ -115,7 +116,7 @@ targets := vmlinux vmlinux.lds \ font.o font.c head.o misc.o $(OBJS) # Make sure files are removed during clean -extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \ +extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs) \ hyp-stub.S diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index 24b0475c..bd245d3 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -51,6 +51,10 @@ extern char * strstr(const char * s1, const char *s2); #include "../../../../lib/decompress_unxz.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { return decompress(input, len, NULL, NULL, output, NULL, error); diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S new file mode 100644 index 0000000..3d9a575 --- /dev/null +++ b/arch/arm/boot/compressed/piggy.lz4.S @@ -0,0 +1,6 @@ + .section .piggydata,#alloc + .globl input_data +input_data: + .incbin "arch/arm/boot/compressed/piggy.lz4" + .globl input_data_end +input_data_end: diff --git a/arch/arm/include/asm/hardware/iop3xx.h b/arch/arm/include/asm/hardware/iop3xx.h index ed94b1a..423744b 100644 --- a/arch/arm/include/asm/hardware/iop3xx.h +++ b/arch/arm/include/asm/hardware/iop3xx.h @@ -223,11 +223,12 @@ extern int iop3xx_get_init_atu(void); #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/reboot.h> void iop3xx_map_io(void); void iop_init_cp6_handler(void); void iop_init_time(unsigned long tickrate); -void iop3xx_restart(char, const char *); +void iop3xx_restart(enum reboot_mode, const char *); static inline u32 read_tmr0(void) { diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 75bf079..441efc4 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -11,6 +11,7 @@ #include <linux/types.h> #ifndef __ASSEMBLY__ +#include <linux/reboot.h> struct tag; struct meminfo; @@ -43,7 +44,7 @@ struct machine_desc { unsigned char reserve_lp0 :1; /* never has lp0 */ unsigned char reserve_lp1 :1; /* never has lp1 */ unsigned char reserve_lp2 :1; /* never has lp2 */ - char restart_mode; /* default restart mode */ + enum reboot_mode reboot_mode; /* default restart mode */ struct smp_operations *smp; /* SMP operations */ bool (*smp_init)(void); void (*fixup)(struct tag *, char **, @@ -58,7 +59,7 @@ struct machine_desc { #ifdef CONFIG_MULTI_IRQ_HANDLER void (*handle_irq)(struct pt_regs *); #endif - void (*restart)(char, const char *); + void (*restart)(enum reboot_mode, const char *); }; /* diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index 21a23e3..a3d61ad 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -6,11 +6,12 @@ #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/irqflags.h> +#include <linux/reboot.h> extern void cpu_init(void); void soft_restart(unsigned long); -extern void (*arm_pm_restart)(char str, const char *cmd); +extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); extern void (*arm_pm_idle)(void); #define UDBG_UNDEFINED (1 << 0) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 7f1efcd..d3ca4f6 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/hw_breakpoint.h> #include <linux/cpuidle.h> #include <linux/leds.h> +#include <linux/reboot.h> #include <asm/cacheflush.h> #include <asm/idmap.h> @@ -113,7 +114,7 @@ void soft_restart(unsigned long addr) BUG(); } -static void null_restart(char mode, const char *cmd) +static void null_restart(enum reboot_mode reboot_mode, const char *cmd) { } @@ -123,7 +124,7 @@ static void null_restart(char mode, const char *cmd) void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -void (*arm_pm_restart)(char str, const char *cmd) = null_restart; +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd) = null_restart; EXPORT_SYMBOL_GPL(arm_pm_restart); /* @@ -175,16 +176,6 @@ void arch_cpu_idle(void) default_idle(); } -static char reboot_mode = 'h'; - -int __init reboot_setup(char *str) -{ - reboot_mode = str[0]; - return 1; -} - -__setup("reboot=", reboot_setup); - /* * Called by kexec, immediately prior to machine_kexec(). * diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2bc1514..0dd3b79 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -886,20 +886,12 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_HAVE_HW_BREAKPOINT case PTRACE_GETHBPREGS: - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - ret = ptrace_gethbpregs(child, addr, (unsigned long __user *)data); - ptrace_put_breakpoints(child); break; case PTRACE_SETHBPREGS: - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - ret = ptrace_sethbpregs(child, addr, (unsigned long __user *)data); - ptrace_put_breakpoints(child); break; #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 9b65327..63af9a7 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -74,7 +74,7 @@ __setup("fpe=", fpe_setup); extern void paging_init(struct machine_desc *desc); extern void sanity_check_meminfo(void); -extern void reboot_setup(char *str); +extern enum reboot_mode reboot_mode; extern void setup_dma_zone(struct machine_desc *desc); unsigned int processor_id; @@ -861,8 +861,8 @@ void __init setup_arch(char **cmdline_p) setup_dma_zone(mdesc); - if (mdesc->restart_mode) - reboot_setup(&mdesc->restart_mode); + if (mdesc->reboot_mode != REBOOT_HARD) + reboot_mode = mdesc->reboot_mode; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c index 9eb5743..4aad93d 100644 --- a/arch/arm/mach-at91/at91rm9200.c +++ b/arch/arm/mach-at91/at91rm9200.c @@ -11,6 +11,7 @@ */ #include <linux/module.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/mach/arch.h> @@ -304,7 +305,7 @@ static void at91rm9200_idle(void) at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK); } -static void at91rm9200_restart(char mode, const char *cmd) +static void at91rm9200_restart(enum reboot_mode reboot_mode, const char *cmd) { /* * Perform a hardware reset with the use of the Watchdog timer. diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h index f6de36a..dc6e2f5 100644 --- a/arch/arm/mach-at91/generic.h +++ b/arch/arm/mach-at91/generic.h @@ -10,6 +10,7 @@ #include <linux/clkdev.h> #include <linux/of.h> +#include <linux/reboot.h> /* Map io */ extern void __init at91_map_io(void); @@ -60,8 +61,8 @@ extern void at91sam9_idle(void); /* reset */ extern void at91_ioremap_rstc(u32 base_addr); -extern void at91sam9_alt_restart(char, const char *); -extern void at91sam9g45_restart(char, const char *); +extern void at91sam9_alt_restart(enum reboot_mode, const char *); +extern void at91sam9g45_restart(enum reboot_mode, const char *); /* shutdown */ extern void at91_ioremap_shdwc(u32 base_addr); diff --git a/arch/arm/mach-bcm2835/bcm2835.c b/arch/arm/mach-bcm2835/bcm2835.c index 740fa9e..40686d7 100644 --- a/arch/arm/mach-bcm2835/bcm2835.c +++ b/arch/arm/mach-bcm2835/bcm2835.c @@ -53,7 +53,7 @@ static void bcm2835_setup_restart(void) WARN(!wdt_regs, "failed to remap watchdog regs"); } -static void bcm2835_restart(char mode, const char *cmd) +static void bcm2835_restart(enum reboot_mode mode, const char *cmd) { u32 val; @@ -91,7 +91,7 @@ static void bcm2835_power_off(void) writel_relaxed(val, wdt_regs + PM_RSTS); /* Continue with normal reset mechanism */ - bcm2835_restart(0, ""); + bcm2835_restart(REBOOT_HARD, ""); } static struct map_desc io_map __initdata = { diff --git a/arch/arm/mach-clps711x/common.c b/arch/arm/mach-clps711x/common.c index f6d1746..4ca2f3c 100644 --- a/arch/arm/mach-clps711x/common.c +++ b/arch/arm/mach-clps711x/common.c @@ -384,7 +384,7 @@ void __init clps711x_timer_init(void) setup_irq(IRQ_TC2OI, &clps711x_timer_irq); } -void clps711x_restart(char mode, const char *cmd) +void clps711x_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0); } diff --git a/arch/arm/mach-clps711x/common.h b/arch/arm/mach-clps711x/common.h index 2a22f4c..9a6767b 100644 --- a/arch/arm/mach-clps711x/common.h +++ b/arch/arm/mach-clps711x/common.h @@ -4,6 +4,8 @@ * Common bits. */ +#include <linux/reboot.h> + #define CLPS711X_NR_IRQS (33) #define CLPS711X_NR_GPIO (4 * 8 + 3) #define CLPS711X_GPIO(prt, bit) ((prt) * 8 + (bit)) @@ -12,5 +14,5 @@ extern void clps711x_map_io(void); extern void clps711x_init_irq(void); extern void clps711x_timer_init(void); extern void clps711x_handle_irq(struct pt_regs *regs); -extern void clps711x_restart(char mode, const char *cmd); +extern void clps711x_restart(enum reboot_mode mode, const char *cmd); extern void clps711x_init_early(void); diff --git a/arch/arm/mach-cns3xxx/core.h b/arch/arm/mach-cns3xxx/core.h index b23b17b..5218b61 100644 --- a/arch/arm/mach-cns3xxx/core.h +++ b/arch/arm/mach-cns3xxx/core.h @@ -11,6 +11,8 @@ #ifndef __CNS3XXX_CORE_H #define __CNS3XXX_CORE_H +#include <linux/reboot.h> + extern void cns3xxx_timer_init(void); #ifdef CONFIG_CACHE_L2X0 @@ -22,6 +24,6 @@ static inline void cns3xxx_l2x0_init(void) {} void __init cns3xxx_map_io(void); void __init cns3xxx_init_irq(void); void cns3xxx_power_off(void); -void cns3xxx_restart(char, const char *); +void cns3xxx_restart(enum reboot_mode, const char *); #endif /* __CNS3XXX_CORE_H */ diff --git a/arch/arm/mach-cns3xxx/pm.c b/arch/arm/mach-cns3xxx/pm.c index 79e3d47..fb38c72 100644 --- a/arch/arm/mach-cns3xxx/pm.c +++ b/arch/arm/mach-cns3xxx/pm.c @@ -89,7 +89,7 @@ void cns3xxx_pwr_soft_rst(unsigned int block) } EXPORT_SYMBOL(cns3xxx_pwr_soft_rst); -void cns3xxx_restart(char mode, const char *cmd) +void cns3xxx_restart(enum reboot_mode mode, const char *cmd) { /* * To reset, we hit the on-board reset register diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index eb254fe..71a46a3 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -16,6 +16,7 @@ #include <linux/serial_8250.h> #include <linux/ahci_platform.h> #include <linux/clk.h> +#include <linux/reboot.h> #include <mach/cputype.h> #include <mach/common.h> @@ -366,7 +367,7 @@ static struct platform_device da8xx_wdt_device = { .resource = da8xx_watchdog_resources, }; -void da8xx_restart(char mode, const char *cmd) +void da8xx_restart(enum reboot_mode mode, const char *cmd) { struct device *dev; diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 90b83d0..111573c0 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -13,6 +13,7 @@ #include <linux/platform_device.h> #include <linux/dma-mapping.h> #include <linux/io.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <linux/platform_data/i2c-davinci.h> @@ -307,7 +308,7 @@ struct platform_device davinci_wdt_device = { .resource = wdt_resources, }; -void davinci_restart(char mode, const char *cmd) +void davinci_restart(enum reboot_mode mode, const char *cmd) { davinci_watchdog_reset(&davinci_wdt_device); } diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h index b124b77..cce316b 100644 --- a/arch/arm/mach-davinci/include/mach/common.h +++ b/arch/arm/mach-davinci/include/mach/common.h @@ -14,6 +14,7 @@ #include <linux/compiler.h> #include <linux/types.h> +#include <linux/reboot.h> extern void davinci_timer_init(void); @@ -81,7 +82,7 @@ extern struct davinci_soc_info davinci_soc_info; extern void davinci_common_init(struct davinci_soc_info *soc_info); extern void davinci_init_ide(void); -void davinci_restart(char mode, const char *cmd); +void davinci_restart(enum reboot_mode mode, const char *cmd); void davinci_init_late(void); #ifdef CONFIG_DAVINCI_RESET_CLOCKS diff --git a/arch/arm/mach-davinci/include/mach/da8xx.h b/arch/arm/mach-davinci/include/mach/da8xx.h index 3c797e2..7b41a5e 100644 --- a/arch/arm/mach-davinci/include/mach/da8xx.h +++ b/arch/arm/mach-davinci/include/mach/da8xx.h @@ -17,6 +17,7 @@ #include <linux/davinci_emac.h> #include <linux/spi/spi.h> #include <linux/platform_data/davinci_asp.h> +#include <linux/reboot.h> #include <linux/videodev2.h> #include <mach/serial.h> @@ -106,7 +107,7 @@ int da850_register_vpif_display (struct vpif_display_config *display_config); int da850_register_vpif_capture (struct vpif_capture_config *capture_config); -void da8xx_restart(char mode, const char *cmd); +void da8xx_restart(enum reboot_mode mode, const char *cmd); void da8xx_rproc_reserve_cma(void); int da8xx_register_rproc(void); diff --git a/arch/arm/mach-davinci/include/mach/tnetv107x.h b/arch/arm/mach-davinci/include/mach/tnetv107x.h index 366e975..16314c6 100644 --- a/arch/arm/mach-davinci/include/mach/tnetv107x.h +++ b/arch/arm/mach-davinci/include/mach/tnetv107x.h @@ -35,6 +35,7 @@ #include <linux/serial_8250.h> #include <linux/input/matrix_keypad.h> #include <linux/mfd/ti_ssp.h> +#include <linux/reboot.h> #include <linux/platform_data/mmc-davinci.h> #include <linux/platform_data/mtd-davinci.h> @@ -54,7 +55,7 @@ extern struct platform_device tnetv107x_serial_device; extern void tnetv107x_init(void); extern void tnetv107x_devices_init(struct tnetv107x_device_info *); extern void tnetv107x_irq_init(void); -void tnetv107x_restart(char mode, const char *cmd); +void tnetv107x_restart(enum reboot_mode mode, const char *cmd); #endif diff --git a/arch/arm/mach-davinci/tnetv107x.c b/arch/arm/mach-davinci/tnetv107x.c index 3b2a70d..4545667 100644 --- a/arch/arm/mach-davinci/tnetv107x.c +++ b/arch/arm/mach-davinci/tnetv107x.c @@ -19,6 +19,7 @@ #include <linux/io.h> #include <linux/err.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <asm/mach/map.h> @@ -730,7 +731,7 @@ static void tnetv107x_watchdog_reset(struct platform_device *pdev) __raw_writel(1, ®s->kick); } -void tnetv107x_restart(char mode, const char *cmd) +void tnetv107x_restart(enum reboot_mode mode, const char *cmd) { tnetv107x_watchdog_reset(&tnetv107x_wdt_device); } diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 2a9443d..00247c7 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -381,7 +381,7 @@ void __init dove_init(void) dove_xor1_init(); } -void dove_restart(char mode, const char *cmd) +void dove_restart(enum reboot_mode mode, const char *cmd) { /* * Enable soft reset to assert RSTOUTn. diff --git a/arch/arm/mach-dove/common.h b/arch/arm/mach-dove/common.h index e863479..1d72522 100644 --- a/arch/arm/mach-dove/common.h +++ b/arch/arm/mach-dove/common.h @@ -11,6 +11,8 @@ #ifndef __ARCH_DOVE_COMMON_H #define __ARCH_DOVE_COMMON_H +#include <linux/reboot.h> + struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -42,6 +44,6 @@ void dove_spi1_init(void); void dove_i2c_init(void); void dove_sdio0_init(void); void dove_sdio1_init(void); -void dove_restart(char, const char *); +void dove_restart(enum reboot_mode, const char *); #endif diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c index 8a53f34..68ac934 100644 --- a/arch/arm/mach-ebsa110/core.c +++ b/arch/arm/mach-ebsa110/core.c @@ -311,7 +311,7 @@ static int __init ebsa110_init(void) arch_initcall(ebsa110_init); -static void ebsa110_restart(char mode, const char *cmd) +static void ebsa110_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0x80000000); } @@ -321,7 +321,6 @@ MACHINE_START(EBSA110, "EBSA110") .atag_offset = 0x400, .reserve_lp0 = 1, .reserve_lp2 = 1, - .restart_mode = 's', .map_io = ebsa110_map_io, .init_early = ebsa110_init_early, .init_irq = ebsa110_init_irq, diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index c49ed3d..df8612f 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -35,6 +35,7 @@ #include <linux/spi/spi.h> #include <linux/export.h> #include <linux/irqchip/arm-vic.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <linux/platform_data/video-ep93xx.h> @@ -921,7 +922,7 @@ void __init ep93xx_init_devices(void) gpio_led_register_device(-1, &ep93xx_led_data); } -void ep93xx_restart(char mode, const char *cmd) +void ep93xx_restart(enum reboot_mode mode, const char *cmd) { /* * Set then clear the SWRST bit to initiate a software reset diff --git a/arch/arm/mach-ep93xx/include/mach/platform.h b/arch/arm/mach-ep93xx/include/mach/platform.h index a14e1b3..e256e0b 100644 --- a/arch/arm/mach-ep93xx/include/mach/platform.h +++ b/arch/arm/mach-ep93xx/include/mach/platform.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include <linux/reboot.h> + struct i2c_gpio_platform_data; struct i2c_board_info; struct spi_board_info; @@ -55,7 +57,7 @@ void ep93xx_ide_release_gpio(struct platform_device *pdev); void ep93xx_init_devices(void); extern void ep93xx_timer_init(void); -void ep93xx_restart(char, const char *); +void ep93xx_restart(enum reboot_mode, const char *); void ep93xx_init_late(void); #ifdef CONFIG_CRUNCH diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index 2c655db..164685b 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -285,12 +285,12 @@ static struct map_desc exynos5440_iodesc0[] __initdata = { }, }; -void exynos4_restart(char mode, const char *cmd) +void exynos4_restart(enum reboot_mode mode, const char *cmd) { __raw_writel(0x1, S5P_SWRESET); } -void exynos5_restart(char mode, const char *cmd) +void exynos5_restart(enum reboot_mode mode, const char *cmd) { struct device_node *np; u32 val; diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index 38d45fd..3e156bc 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -12,6 +12,7 @@ #ifndef __ARCH_ARM_MACH_EXYNOS_COMMON_H #define __ARCH_ARM_MACH_EXYNOS_COMMON_H +#include <linux/reboot.h> #include <linux/of.h> void mct_init(void __iomem *base, int irq_g0, int irq_l0, int irq_l1); @@ -20,8 +21,8 @@ extern unsigned long xxti_f, xusbxti_f; struct map_desc; void exynos_init_io(void); -void exynos4_restart(char mode, const char *cmd); -void exynos5_restart(char mode, const char *cmd); +void exynos4_restart(enum reboot_mode mode, const char *cmd); +void exynos5_restart(enum reboot_mode mode, const char *cmd); void exynos_init_late(void); /* ToDo: remove these after migrating legacy exynos4 platforms to dt */ diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c index 6987a09..9669cc0 100644 --- a/arch/arm/mach-footbridge/cats-hw.c +++ b/arch/arm/mach-footbridge/cats-hw.c @@ -86,7 +86,7 @@ fixup_cats(struct tag *tags, char **cmdline, struct meminfo *mi) MACHINE_START(CATS, "Chalice-CATS") /* Maintainer: Philip Blundell */ .atag_offset = 0x100, - .restart_mode = 's', + .reboot_mode = REBOOT_SOFT, .fixup = fixup_cats, .map_io = footbridge_map_io, .init_irq = footbridge_init_irq, diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index a42b369..2739ca2 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -198,9 +198,9 @@ void __init footbridge_map_io(void) } } -void footbridge_restart(char mode, const char *cmd) +void footbridge_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into the ROM */ soft_restart(0x41000000); } else { diff --git a/arch/arm/mach-footbridge/common.h b/arch/arm/mach-footbridge/common.h index a846e50..56607b3 100644 --- a/arch/arm/mach-footbridge/common.h +++ b/arch/arm/mach-footbridge/common.h @@ -1,3 +1,4 @@ +#include <linux/reboot.h> extern void footbridge_timer_init(void); extern void isa_timer_init(void); @@ -8,4 +9,4 @@ extern void footbridge_map_io(void); extern void footbridge_init_irq(void); extern void isa_init_irq(unsigned int irq); -extern void footbridge_restart(char, const char *); +extern void footbridge_restart(enum reboot_mode, const char *); diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index 90ea23f..1fd2cf0 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -634,9 +634,9 @@ fixup_netwinder(struct tag *tags, char **cmdline, struct meminfo *mi) #endif } -static void netwinder_restart(char mode, const char *cmd) +static void netwinder_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into the ROM */ soft_restart(0x41000000); } else { diff --git a/arch/arm/mach-highbank/core.h b/arch/arm/mach-highbank/core.h index 3f65206..aea1ec5 100644 --- a/arch/arm/mach-highbank/core.h +++ b/arch/arm/mach-highbank/core.h @@ -1,8 +1,10 @@ #ifndef __HIGHBANK_CORE_H #define __HIGHBANK_CORE_H +#include <linux/reboot.h> + extern void highbank_set_cpu_jump(int cpu, void *jump_addr); -extern void highbank_restart(char, const char *); +extern void highbank_restart(enum reboot_mode, const char *); extern void __iomem *scu_base_addr; #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-highbank/system.c b/arch/arm/mach-highbank/system.c index 37d8384..2df5870 100644 --- a/arch/arm/mach-highbank/system.c +++ b/arch/arm/mach-highbank/system.c @@ -15,13 +15,14 @@ */ #include <linux/io.h> #include <asm/proc-fns.h> +#include <linux/reboot.h> #include "core.h" #include "sysregs.h" -void highbank_restart(char mode, const char *cmd) +void highbank_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 'h') + if (mode == REBOOT_HARD) highbank_set_pwr_hard_reset(); else highbank_set_pwr_soft_reset(); diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index ee78847..cb6c838 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -11,6 +11,8 @@ #ifndef __ASM_ARCH_MXC_COMMON_H__ #define __ASM_ARCH_MXC_COMMON_H__ +#include <linux/reboot.h> + struct platform_device; struct pt_regs; struct clk; @@ -71,7 +73,7 @@ extern int mx53_clocks_init_dt(void); extern struct platform_device *mxc_register_gpio(char *name, int id, resource_size_t iobase, resource_size_t iosize, int irq, int irq_high); extern void mxc_set_cpu_type(unsigned int type); -extern void mxc_restart(char, const char *); +extern void mxc_restart(enum reboot_mode, const char *); extern void mxc_arch_reset_init(void __iomem *); extern void mxc_arch_reset_init_dt(void); extern int mx53_revision(void); diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index f596522..7be13f8 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -27,6 +27,7 @@ #include <linux/of_platform.h> #include <linux/opp.h> #include <linux/phy.h> +#include <linux/reboot.h> #include <linux/regmap.h> #include <linux/micrel_phy.h> #include <linux/mfd/syscon.h> @@ -67,7 +68,7 @@ static void __init imx6q_init_revision(void) mxc_set_cpu_type(rev >> 16 & 0xff); } -static void imx6q_restart(char mode, const char *cmd) +static void imx6q_restart(enum reboot_mode mode, const char *cmd) { struct device_node *np; void __iomem *wdog_base; diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c index 7cdc79a..6fe81bb 100644 --- a/arch/arm/mach-imx/system.c +++ b/arch/arm/mach-imx/system.c @@ -37,7 +37,7 @@ static struct clk *wdog_clk; /* * Reset the system. It is called by machine_restart(). */ -void mxc_restart(char mode, const char *cmd) +void mxc_restart(enum reboot_mode mode, const char *cmd) { unsigned int wcr_enable; diff --git a/arch/arm/mach-integrator/common.h b/arch/arm/mach-integrator/common.h index 72516658b..ad0ac554 100644 --- a/arch/arm/mach-integrator/common.h +++ b/arch/arm/mach-integrator/common.h @@ -1,7 +1,8 @@ +#include <linux/reboot.h> #include <linux/amba/serial.h> extern struct amba_pl010_data ap_uart_data; void integrator_init_early(void); int integrator_init(bool is_cp); void integrator_reserve(void); -void integrator_restart(char, const char *); +void integrator_restart(enum reboot_mode, const char *); void integrator_init_sysfs(struct device *parent, u32 id); diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 81461d2..4cdfd73 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -124,7 +124,7 @@ void __init integrator_reserve(void) /* * To reset, we hit the on-board reset register in the system FPGA */ -void integrator_restart(char mode, const char *cmd) +void integrator_restart(enum reboot_mode mode, const char *cmd) { cm_control(CM_CTRL_RESET, CM_CTRL_RESET); } diff --git a/arch/arm/mach-iop13xx/include/mach/iop13xx.h b/arch/arm/mach-iop13xx/include/mach/iop13xx.h index 7480f58..17b4027 100644 --- a/arch/arm/mach-iop13xx/include/mach/iop13xx.h +++ b/arch/arm/mach-iop13xx/include/mach/iop13xx.h @@ -2,6 +2,9 @@ #define _IOP13XX_HW_H_ #ifndef __ASSEMBLY__ + +#include <linux/reboot.h> + /* The ATU offsets can change based on the strapping */ extern u32 iop13xx_atux_pmmr_offset; extern u32 iop13xx_atue_pmmr_offset; @@ -11,7 +14,7 @@ void iop13xx_map_io(void); void iop13xx_platform_init(void); void iop13xx_add_tpmi_devices(void); void iop13xx_init_irq(void); -void iop13xx_restart(char, const char *); +void iop13xx_restart(enum reboot_mode, const char *); /* CPUID CP6 R0 Page 0 */ static inline int iop13xx_cpu_id(void) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 1c5bd763..96e6c7a 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -594,7 +594,7 @@ __setup("iop13xx_init_adma", iop13xx_init_adma_setup); __setup("iop13xx_init_uart", iop13xx_init_uart_setup); __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup); -void iop13xx_restart(char mode, const char *cmd) +void iop13xx_restart(enum reboot_mode mode, const char *cmd) { /* * Reset the internal bus (warning both cores are reset) diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index ea0984a..0691443 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -286,7 +286,7 @@ static void n2100_power_off(void) ; } -static void n2100_restart(char mode, const char *cmd) +static void n2100_restart(enum reboot_mode mode, const char *cmd) { gpio_line_set(N2100_HARDWARE_RESET, GPIO_LOW); gpio_line_config(N2100_HARDWARE_RESET, GPIO_OUT); diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 1f6c1fb..5327dec 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -531,9 +531,9 @@ static void __init ixp4xx_clockevent_init(void) 0xf, 0xfffffffe); } -void ixp4xx_restart(char mode, const char *cmd) +void ixp4xx_restart(enum reboot_mode mode, const char *cmd) { - if ( 1 && mode == 's') { + if ( 1 && mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index 5d413f8..686ef34 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -27,6 +27,7 @@ #include <linux/i2c.h> #include <linux/i2c-gpio.h> +#include <mach/hardware.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/flash.h> diff --git a/arch/arm/mach-ixp4xx/include/mach/platform.h b/arch/arm/mach-ixp4xx/include/mach/platform.h index db5afb6..4c4c6a6 100644 --- a/arch/arm/mach-ixp4xx/include/mach/platform.h +++ b/arch/arm/mach-ixp4xx/include/mach/platform.h @@ -13,6 +13,8 @@ #ifndef __ASSEMBLY__ +#include <linux/reboot.h> + #include <asm/types.h> #ifndef __ARMEB__ @@ -123,7 +125,7 @@ extern void ixp4xx_init_early(void); extern void ixp4xx_init_irq(void); extern void ixp4xx_sys_init(void); extern void ixp4xx_timer_init(void); -extern void ixp4xx_restart(char, const char *); +extern void ixp4xx_restart(enum reboot_mode, const char *); extern void ixp4xx_pci_preinit(void); struct pci_sys_data; extern int ixp4xx_setup(int nr, struct pci_sys_data *sys); diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 7c72c72..e9238b5 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -20,6 +20,7 @@ #include <linux/mv643xx_i2c.h> #include <linux/timex.h> #include <linux/kexec.h> +#include <linux/reboot.h> #include <net/dsa.h> #include <asm/page.h> #include <asm/mach/map.h> @@ -722,7 +723,7 @@ void __init kirkwood_init(void) #endif } -void kirkwood_restart(char mode, const char *cmd) +void kirkwood_restart(enum reboot_mode mode, const char *cmd) { /* * Enable soft reset to assert RSTOUTn. diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h index 1c09f3f..fcf3ba6 100644 --- a/arch/arm/mach-kirkwood/common.h +++ b/arch/arm/mach-kirkwood/common.h @@ -11,6 +11,8 @@ #ifndef __ARCH_KIRKWOOD_COMMON_H #define __ARCH_KIRKWOOD_COMMON_H +#include <linux/reboot.h> + struct dsa_platform_data; struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -53,7 +55,7 @@ void kirkwood_audio_init(void); void kirkwood_cpuidle_init(void); void kirkwood_cpufreq_init(void); -void kirkwood_restart(char, const char *); +void kirkwood_restart(enum reboot_mode, const char *); void kirkwood_clk_init(void); /* board init functions for boards not fully converted to fdt */ diff --git a/arch/arm/mach-ks8695/generic.h b/arch/arm/mach-ks8695/generic.h index 6e97ce4..43253f8 100644 --- a/arch/arm/mach-ks8695/generic.h +++ b/arch/arm/mach-ks8695/generic.h @@ -12,5 +12,5 @@ extern __init void ks8695_map_io(void); extern __init void ks8695_init_irq(void); -extern void ks8695_restart(char, const char *); +extern void ks8695_restart(enum reboot_mode, const char *); extern void ks8695_timer_init(void); diff --git a/arch/arm/mach-ks8695/time.c b/arch/arm/mach-ks8695/time.c index c272a386..426c976 100644 --- a/arch/arm/mach-ks8695/time.c +++ b/arch/arm/mach-ks8695/time.c @@ -154,11 +154,11 @@ void __init ks8695_timer_init(void) setup_irq(KS8695_IRQ_TIMER1, &ks8695_timer_irq); } -void ks8695_restart(char mode, const char *cmd) +void ks8695_restart(enum reboot_mode reboot_mode, const char *cmd) { unsigned int reg; - if (mode == 's') + if (reboot_mode == REBOOT_SOFT) soft_restart(0); /* disable timer0 */ diff --git a/arch/arm/mach-lpc32xx/common.c b/arch/arm/mach-lpc32xx/common.c index 0d4db8c..d7aa54c 100644 --- a/arch/arm/mach-lpc32xx/common.c +++ b/arch/arm/mach-lpc32xx/common.c @@ -207,11 +207,11 @@ void __init lpc32xx_map_io(void) iotable_init(lpc32xx_io_desc, ARRAY_SIZE(lpc32xx_io_desc)); } -void lpc23xx_restart(char mode, const char *cmd) +void lpc23xx_restart(enum reboot_mode mode, const char *cmd) { switch (mode) { - case 's': - case 'h': + case REBOOT_SOFT: + case REBOOT_HARD: lpc32xx_watchdog_reset(); break; diff --git a/arch/arm/mach-lpc32xx/common.h b/arch/arm/mach-lpc32xx/common.h index e0b2606..1cd8853 100644 --- a/arch/arm/mach-lpc32xx/common.h +++ b/arch/arm/mach-lpc32xx/common.h @@ -21,6 +21,7 @@ #include <mach/board.h> #include <linux/platform_device.h> +#include <linux/reboot.h> /* * Other arch specific structures and functions @@ -29,7 +30,7 @@ extern void lpc32xx_timer_init(void); extern void __init lpc32xx_init_irq(void); extern void __init lpc32xx_map_io(void); extern void __init lpc32xx_serial_init(void); -extern void lpc23xx_restart(char, const char *); +extern void lpc23xx_restart(enum reboot_mode, const char *); /* diff --git a/arch/arm/mach-mmp/common.c b/arch/arm/mach-mmp/common.c index 9292b79..c03b4ab 100644 --- a/arch/arm/mach-mmp/common.c +++ b/arch/arm/mach-mmp/common.c @@ -47,7 +47,7 @@ void __init mmp_map_io(void) mmp_chip_id = __raw_readl(MMP_CHIPID); } -void mmp_restart(char mode, const char *cmd) +void mmp_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0); } diff --git a/arch/arm/mach-mmp/common.h b/arch/arm/mach-mmp/common.h index 0bdc50b..991d7e9 100644 --- a/arch/arm/mach-mmp/common.h +++ b/arch/arm/mach-mmp/common.h @@ -1,10 +1,11 @@ +#include <linux/reboot.h> #define ARRAY_AND_SIZE(x) (x), ARRAY_SIZE(x) extern void timer_init(int irq); extern void __init icu_init_irq(void); extern void __init mmp_map_io(void); -extern void mmp_restart(char, const char *); +extern void mmp_restart(enum reboot_mode, const char *); extern void __init pxa168_clk_init(void); extern void __init pxa910_clk_init(void); extern void __init mmp2_clk_init(void); diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h index 7ed1df2..459c2d0 100644 --- a/arch/arm/mach-mmp/include/mach/pxa168.h +++ b/arch/arm/mach-mmp/include/mach/pxa168.h @@ -1,9 +1,11 @@ #ifndef __ASM_MACH_PXA168_H #define __ASM_MACH_PXA168_H +#include <linux/reboot.h> + extern void pxa168_timer_init(void); extern void __init pxa168_init_irq(void); -extern void pxa168_restart(char, const char *); +extern void pxa168_restart(enum reboot_mode, const char *); extern void pxa168_clear_keypad_wakeup(void); #include <linux/i2c.h> diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c index a30dcf3..144e997 100644 --- a/arch/arm/mach-mmp/pxa168.c +++ b/arch/arm/mach-mmp/pxa168.c @@ -172,7 +172,7 @@ int __init pxa168_add_usb_host(struct mv_usb_platform_data *pdata) return platform_device_register(&pxa168_device_usb_host); } -void pxa168_restart(char mode, const char *cmd) +void pxa168_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0xffff0000); } diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 749a7f8..75062ef 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -413,7 +413,7 @@ void __init mv78xx0_init(void) clk_init(); } -void mv78xx0_restart(char mode, const char *cmd) +void mv78xx0_restart(enum reboot_mode mode, const char *cmd) { /* * Enable soft reset to assert RSTOUTn. diff --git a/arch/arm/mach-mv78xx0/common.h b/arch/arm/mach-mv78xx0/common.h index 5e9485b..6889af26 100644 --- a/arch/arm/mach-mv78xx0/common.h +++ b/arch/arm/mach-mv78xx0/common.h @@ -11,6 +11,8 @@ #ifndef __ARCH_MV78XX0_COMMON_H #define __ARCH_MV78XX0_COMMON_H +#include <linux/reboot.h> + struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -45,7 +47,7 @@ void mv78xx0_uart1_init(void); void mv78xx0_uart2_init(void); void mv78xx0_uart3_init(void); void mv78xx0_i2c_init(void); -void mv78xx0_restart(char, const char *); +void mv78xx0_restart(enum reboot_mode, const char *); extern void mv78xx0_timer_init(void); diff --git a/arch/arm/mach-mvebu/common.h b/arch/arm/mach-mvebu/common.h index 98defd5..e366010 100644 --- a/arch/arm/mach-mvebu/common.h +++ b/arch/arm/mach-mvebu/common.h @@ -17,7 +17,9 @@ #define ARMADA_XP_MAX_CPUS 4 -void mvebu_restart(char mode, const char *cmd); +#include <linux/reboot.h> + +void mvebu_restart(enum reboot_mode mode, const char *cmd); void armada_370_xp_init_irq(void); void armada_370_xp_handle_irq(struct pt_regs *regs); diff --git a/arch/arm/mach-mvebu/system-controller.c b/arch/arm/mach-mvebu/system-controller.c index b8079df..f875124 100644 --- a/arch/arm/mach-mvebu/system-controller.c +++ b/arch/arm/mach-mvebu/system-controller.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/io.h> +#include <linux/reboot.h> static void __iomem *system_controller_base; @@ -63,7 +64,7 @@ static struct of_device_id of_system_controller_table[] = { { /* end of list */ }, }; -void mvebu_restart(char mode, const char *cmd) +void mvebu_restart(enum reboot_mode mode, const char *cmd) { if (!system_controller_base) { pr_err("Cannot restart, system-controller not available: check the device tree\n"); diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 7fa611c1b..6298adb 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -20,6 +20,7 @@ #include <linux/gpio.h> #include <linux/init.h> #include <linux/irqchip/mxs.h> +#include <linux/reboot.h> #include <linux/micrel_phy.h> #include <linux/of_address.h> #include <linux/of_platform.h> @@ -500,7 +501,7 @@ static void __init mxs_machine_init(void) /* * Reset the system. It is called by machine_restart(). */ -static void mxs_restart(char mode, const char *cmd) +static void mxs_restart(enum reboot_mode mode, const char *cmd) { struct device_node *np; void __iomem *reset_addr; diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index 1504b68..db25b0c 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -24,6 +24,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/irqchip/arm-vic.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/mach/map.h> #include <mach/netx-regs.h> @@ -187,7 +188,7 @@ static int __init netx_init(void) subsys_initcall(netx_init); -void netx_restart(char mode, const char *cmd) +void netx_restart(enum reboot_mode mode, const char *cmd) { writel(NETX_SYSTEM_RES_CR_FIRMW_RES_EN | NETX_SYSTEM_RES_CR_FIRMW_RES, NETX_SYSTEM_RES_CR); diff --git a/arch/arm/mach-netx/generic.h b/arch/arm/mach-netx/generic.h index 768b26b..bb2ce47 100644 --- a/arch/arm/mach-netx/generic.h +++ b/arch/arm/mach-netx/generic.h @@ -17,8 +17,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/reboot.h> + extern void __init netx_map_io(void); extern void __init netx_init_irq(void); -extern void netx_restart(char, const char *); +extern void netx_restart(enum reboot_mode, const char *); extern void netx_timer_init(void); diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c index 2df209e..13e0df9 100644 --- a/arch/arm/mach-nomadik/cpu-8815.c +++ b/arch/arm/mach-nomadik/cpu-8815.c @@ -103,7 +103,7 @@ static void __init cpu8815_map_io(void) iotable_init(cpu8815_io_desc, ARRAY_SIZE(cpu8815_io_desc)); } -static void cpu8815_restart(char mode, const char *cmd) +static void cpu8815_restart(enum reboot_mode mode, const char *cmd) { void __iomem *srcbase = ioremap(NOMADIK_SRC_BASE, SZ_4K); diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c index 6c116e1..4677a9c 100644 --- a/arch/arm/mach-omap1/board-voiceblue.c +++ b/arch/arm/mach-omap1/board-voiceblue.c @@ -26,6 +26,7 @@ #include <linux/serial_reg.h> #include <linux/smc91x.h> #include <linux/export.h> +#include <linux/reboot.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -215,7 +216,7 @@ void voiceblue_wdt_ping(void) gpio_set_value(0, wdt_gpio_state); } -static void voiceblue_restart(char mode, const char *cmd) +static void voiceblue_restart(enum reboot_mode mode, const char *cmd) { /* * Workaround for 5912/1611b bug mentioned in sprz209d.pdf p. 28 diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h index 14f7e99..abec019 100644 --- a/arch/arm/mach-omap1/common.h +++ b/arch/arm/mach-omap1/common.h @@ -28,6 +28,7 @@ #include <linux/mtd/mtd.h> #include <linux/i2c-omap.h> +#include <linux/reboot.h> #include <plat/i2c.h> @@ -70,7 +71,7 @@ static inline int omap_serial_wakeup_init(void) void omap1_init_early(void); void omap1_init_irq(void); void omap1_init_late(void); -void omap1_restart(char, const char *); +void omap1_restart(enum reboot_mode, const char *); extern void __init omap_check_revision(void); diff --git a/arch/arm/mach-omap1/reset.c b/arch/arm/mach-omap1/reset.c index 5eebd7e..72bf4bf 100644 --- a/arch/arm/mach-omap1/reset.c +++ b/arch/arm/mach-omap1/reset.c @@ -3,6 +3,7 @@ */ #include <linux/kernel.h> #include <linux/io.h> +#include <linux/reboot.h> #include <mach/hardware.h> @@ -22,7 +23,7 @@ #define OMAP_EXTWARM_RST_SRC_ID_SHIFT 5 -void omap1_restart(char mode, const char *cmd) +void omap1_restart(enum reboot_mode mode, const char *cmd) { /* * Workaround for 5912/1611b bug mentioned in sprz209d.pdf p. 28 diff --git a/arch/arm/mach-omap2/am33xx-restart.c b/arch/arm/mach-omap2/am33xx-restart.c index 88e4fa8..1eae962 100644 --- a/arch/arm/mach-omap2/am33xx-restart.c +++ b/arch/arm/mach-omap2/am33xx-restart.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ #include <linux/kernel.h> +#include <linux/reboot.h> #include "common.h" #include "prm-regbits-33xx.h" @@ -19,7 +20,7 @@ * Resets the SoC. For @cmd, see the 'reboot' syscall in * kernel/sys.c. No return value. */ -void am33xx_restart(char mode, const char *cmd) +void am33xx_restart(enum reboot_mode mode, const char *cmd) { /* TODO: Handle mode and cmd if necessary */ diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 72cab3f..dfcc182 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -31,6 +31,7 @@ #include <linux/i2c.h> #include <linux/i2c/twl.h> #include <linux/i2c-omap.h> +#include <linux/reboot.h> #include <asm/proc-fns.h> @@ -119,33 +120,33 @@ static inline void omap_soc_device_init(void) #endif #if defined(CONFIG_SOC_OMAP2420) || defined(CONFIG_SOC_OMAP2430) -void omap2xxx_restart(char mode, const char *cmd); +void omap2xxx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void omap2xxx_restart(char mode, const char *cmd) +static inline void omap2xxx_restart(enum reboot_mode mode, const char *cmd) { } #endif #ifdef CONFIG_SOC_AM33XX -void am33xx_restart(char mode, const char *cmd); +void am33xx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void am33xx_restart(char mode, const char *cmd) +static inline void am33xx_restart(enum reboot_mode mode, const char *cmd) { } #endif #ifdef CONFIG_ARCH_OMAP3 -void omap3xxx_restart(char mode, const char *cmd); +void omap3xxx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void omap3xxx_restart(char mode, const char *cmd) +static inline void omap3xxx_restart(enum reboot_mode mode, const char *cmd) { } #endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) -void omap44xx_restart(char mode, const char *cmd); +void omap44xx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void omap44xx_restart(char mode, const char *cmd) +static inline void omap44xx_restart(enum reboot_mode mode, const char *cmd) { } #endif diff --git a/arch/arm/mach-omap2/omap2-restart.c b/arch/arm/mach-omap2/omap2-restart.c index 719b716..68423e2 100644 --- a/arch/arm/mach-omap2/omap2-restart.c +++ b/arch/arm/mach-omap2/omap2-restart.c @@ -31,7 +31,7 @@ static struct clk *reset_virt_prcm_set_ck, *reset_sys_ck; * Set the DPLL to bypass so that reboot completes successfully. No * return value. */ -void omap2xxx_restart(char mode, const char *cmd) +void omap2xxx_restart(enum reboot_mode mode, const char *cmd) { u32 rate; diff --git a/arch/arm/mach-omap2/omap3-restart.c b/arch/arm/mach-omap2/omap3-restart.c index 923c582..5de2a0c 100644 --- a/arch/arm/mach-omap2/omap3-restart.c +++ b/arch/arm/mach-omap2/omap3-restart.c @@ -12,6 +12,7 @@ */ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/reboot.h> #include "iomap.h" #include "common.h" @@ -28,7 +29,7 @@ * Resets the SoC. For @cmd, see the 'reboot' syscall in * kernel/sys.c. No return value. */ -void omap3xxx_restart(char mode, const char *cmd) +void omap3xxx_restart(enum reboot_mode mode, const char *cmd) { omap3_ctrl_write_boot_mode((cmd ? (u8)*cmd : 0)); omap3xxx_prm_dpll3_reset(); /* never returns */ diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 38cd3a6..5791143 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/irqchip/arm-gic.h> #include <linux/of_address.h> +#include <linux/reboot.h> #include <asm/hardware/cache-l2x0.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-omap2/omap4-restart.c b/arch/arm/mach-omap2/omap4-restart.c index f90e02e..41dfd7d 100644 --- a/arch/arm/mach-omap2/omap4-restart.c +++ b/arch/arm/mach-omap2/omap4-restart.c @@ -8,6 +8,7 @@ */ #include <linux/types.h> +#include <linux/reboot.h> #include "prminst44xx.h" /** @@ -18,7 +19,7 @@ * Resets the SoC. For @cmd, see the 'reboot' syscall in * kernel/sys.c. No return value. */ -void omap44xx_restart(char mode, const char *cmd) +void omap44xx_restart(enum reboot_mode mode, const char *cmd) { /* XXX Should save 'cmd' into scratchpad for use after reboot */ omap4_prminst_global_warm_sw_reset(); /* never returns */ diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index f8a6db9..b41599f 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -347,7 +347,7 @@ void __init orion5x_init(void) orion5x_wdt_init(); } -void orion5x_restart(char mode, const char *cmd) +void orion5x_restart(enum reboot_mode mode, const char *cmd) { /* * Enable and issue soft reset diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index cdaa01f..a909afb 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -1,6 +1,8 @@ #ifndef __ARCH_ORION5X_COMMON_H #define __ARCH_ORION5X_COMMON_H +#include <linux/reboot.h> + struct dsa_platform_data; struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -29,7 +31,7 @@ void orion5x_spi_init(void); void orion5x_uart0_init(void); void orion5x_uart1_init(void); void orion5x_xor_init(void); -void orion5x_restart(char, const char *); +void orion5x_restart(enum reboot_mode, const char *); /* * PCIe/PCI functions. diff --git a/arch/arm/mach-orion5x/ls-chl-setup.c b/arch/arm/mach-orion5x/ls-chl-setup.c index 24f4e14..6234977 100644 --- a/arch/arm/mach-orion5x/ls-chl-setup.c +++ b/arch/arm/mach-orion5x/ls-chl-setup.c @@ -139,7 +139,7 @@ static struct mv_sata_platform_data lschl_sata_data = { static void lschl_power_off(void) { - orion5x_restart('h', NULL); + orion5x_restart(REBOOT_HARD, NULL); } /***************************************************************************** diff --git a/arch/arm/mach-orion5x/ls_hgl-setup.c b/arch/arm/mach-orion5x/ls_hgl-setup.c index fc653bb..fe04c4b 100644 --- a/arch/arm/mach-orion5x/ls_hgl-setup.c +++ b/arch/arm/mach-orion5x/ls_hgl-setup.c @@ -185,7 +185,7 @@ static struct mv_sata_platform_data ls_hgl_sata_data = { static void ls_hgl_power_off(void) { - orion5x_restart('h', NULL); + orion5x_restart(REBOOT_HARD, NULL); } diff --git a/arch/arm/mach-orion5x/lsmini-setup.c b/arch/arm/mach-orion5x/lsmini-setup.c index 18e66e6..ca4dbe9 100644 --- a/arch/arm/mach-orion5x/lsmini-setup.c +++ b/arch/arm/mach-orion5x/lsmini-setup.c @@ -185,7 +185,7 @@ static struct mv_sata_platform_data lsmini_sata_data = { static void lsmini_power_off(void) { - orion5x_restart('h', NULL); + orion5x_restart(REBOOT_HARD, NULL); } diff --git a/arch/arm/mach-picoxcell/common.c b/arch/arm/mach-picoxcell/common.c index b13f51b..ec79fea 100644 --- a/arch/arm/mach-picoxcell/common.c +++ b/arch/arm/mach-picoxcell/common.c @@ -11,6 +11,7 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_platform.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -63,7 +64,7 @@ static const char *picoxcell_dt_match[] = { NULL }; -static void picoxcell_wdt_restart(char mode, const char *cmd) +static void picoxcell_wdt_restart(enum reboot_mode mode, const char *cmd) { /* * Configure the watchdog to reset with the shortest possible timeout diff --git a/arch/arm/mach-prima2/common.h b/arch/arm/mach-prima2/common.h index 81135cd..a630485 100644 --- a/arch/arm/mach-prima2/common.h +++ b/arch/arm/mach-prima2/common.h @@ -10,6 +10,8 @@ #define __MACH_PRIMA2_COMMON_H__ #include <linux/init.h> +#include <linux/reboot.h> + #include <asm/mach/time.h> #include <asm/exception.h> @@ -22,7 +24,7 @@ extern void sirfsoc_cpu_die(unsigned int cpu); extern void __init sirfsoc_of_irq_init(void); extern void __init sirfsoc_of_clk_init(void); -extern void sirfsoc_restart(char, const char *); +extern void sirfsoc_restart(enum reboot_mode, const char *); extern asmlinkage void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs); #ifndef CONFIG_DEBUG_LL diff --git a/arch/arm/mach-prima2/rstc.c b/arch/arm/mach-prima2/rstc.c index d5e0cbc..ccb5339 100644 --- a/arch/arm/mach-prima2/rstc.c +++ b/arch/arm/mach-prima2/rstc.c @@ -13,6 +13,7 @@ #include <linux/device.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/reboot.h> void __iomem *sirfsoc_rstc_base; static DEFINE_MUTEX(rstc_lock); @@ -84,7 +85,7 @@ int sirfsoc_reset_device(struct device *dev) #define SIRFSOC_SYS_RST_BIT BIT(31) -void sirfsoc_restart(char mode, const char *cmd) +void sirfsoc_restart(enum reboot_mode mode, const char *cmd) { writel(SIRFSOC_SYS_RST_BIT, sirfsoc_rstc_base); } diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index a5b8fead..f162f1b 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -663,16 +663,16 @@ static void corgi_poweroff(void) /* Green LED off tells the bootloader to halt */ gpio_set_value(CORGI_GPIO_LED_GREEN, 0); - pxa_restart('h', NULL); + pxa_restart(REBOOT_HARD, NULL); } -static void corgi_restart(char mode, const char *cmd) +static void corgi_restart(enum reboot_mode mode, const char *cmd) { if (!machine_is_corgi()) /* Green LED on tells the bootloader to reboot */ gpio_set_value(CORGI_GPIO_LED_GREEN, 1); - pxa_restart('h', cmd); + pxa_restart(REBOOT_HARD, cmd); } static void __init corgi_init(void) diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h index fd7ea39..8963984 100644 --- a/arch/arm/mach-pxa/generic.h +++ b/arch/arm/mach-pxa/generic.h @@ -9,6 +9,8 @@ * published by the Free Software Foundation. */ +#include <linux/reboot.h> + struct irq_data; extern void pxa_timer_init(void); @@ -56,4 +58,4 @@ void __init pxa_set_btuart_info(void *info); void __init pxa_set_stuart_info(void *info); void __init pxa_set_hwuart_info(void *info); -void pxa_restart(char, const char *); +void pxa_restart(enum reboot_mode, const char *); diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c index 654b0ac..acc9d3c 100644 --- a/arch/arm/mach-pxa/mioa701.c +++ b/arch/arm/mach-pxa/mioa701.c @@ -37,6 +37,7 @@ #include <linux/wm97xx.h> #include <linux/mtd/physmap.h> #include <linux/usb/gpio_vbus.h> +#include <linux/reboot.h> #include <linux/regulator/max1586.h> #include <linux/slab.h> #include <linux/i2c/pxa-i2c.h> @@ -696,13 +697,13 @@ static void mioa701_machine_exit(void); static void mioa701_poweroff(void) { mioa701_machine_exit(); - pxa_restart('s', NULL); + pxa_restart(REBOOT_SOFT, NULL); } -static void mioa701_restart(char c, const char *cmd) +static void mioa701_restart(enum reboot_mode c, const char *cmd) { mioa701_machine_exit(); - pxa_restart('s', cmd); + pxa_restart(REBOOT_SOFT, cmd); } static struct gpio global_gpios[] = { @@ -761,7 +762,6 @@ static void mioa701_machine_exit(void) MACHINE_START(MIOA701, "MIO A701") .atag_offset = 0x100, - .restart_mode = 's', .map_io = &pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = &pxa27x_init_irq, diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 50ccd5f..711d37e 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -422,7 +422,7 @@ static struct i2c_board_info __initdata poodle_i2c_devices[] = { static void poodle_poweroff(void) { - pxa_restart('h', NULL); + pxa_restart(REBOOT_HARD, NULL); } static void __init poodle_init(void) diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c index 3fab583..0d5dd64 100644 --- a/arch/arm/mach-pxa/reset.c +++ b/arch/arm/mach-pxa/reset.c @@ -83,7 +83,7 @@ static void do_hw_reset(void) writel_relaxed(readl_relaxed(OSCR) + 368640, OSMR3); } -void pxa_restart(char mode, const char *cmd) +void pxa_restart(enum reboot_mode mode, const char *cmd) { local_irq_disable(); local_fiq_disable(); @@ -91,14 +91,14 @@ void pxa_restart(char mode, const char *cmd) clear_reset_status(RESET_STATUS_ALL); switch (mode) { - case 's': + case REBOOT_SOFT: /* Jump into ROM at address 0 */ soft_restart(0); break; - case 'g': + case REBOOT_GPIO: do_gpio_reset(); break; - case 'h': + case REBOOT_HARD: default: do_hw_reset(); break; diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 362726c..2125df0 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -31,6 +31,7 @@ #include <linux/regulator/machine.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/reboot.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -924,10 +925,10 @@ static inline void spitz_i2c_init(void) {} ******************************************************************************/ static void spitz_poweroff(void) { - pxa_restart('g', NULL); + pxa_restart(REBOOT_GPIO, NULL); } -static void spitz_restart(char mode, const char *cmd) +static void spitz_restart(enum reboot_mode mode, const char *cmd) { uint32_t msc0 = __raw_readl(MSC0); /* Bootloader magic for a reboot */ @@ -979,7 +980,6 @@ static void __init spitz_fixup(struct tag *tags, char **cmdline, #ifdef CONFIG_MACH_SPITZ MACHINE_START(SPITZ, "SHARP Spitz") - .restart_mode = 'g', .fixup = spitz_fixup, .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, @@ -993,7 +993,6 @@ MACHINE_END #ifdef CONFIG_MACH_BORZOI MACHINE_START(BORZOI, "SHARP Borzoi") - .restart_mode = 'g', .fixup = spitz_fixup, .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, @@ -1007,7 +1006,6 @@ MACHINE_END #ifdef CONFIG_MACH_AKITA MACHINE_START(AKITA, "SHARP Akita") - .restart_mode = 'g', .fixup = spitz_fixup, .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 3d91d2e..0206b91 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -36,6 +36,7 @@ #include <linux/input/matrix_keypad.h> #include <linux/i2c/pxa-i2c.h> #include <linux/usb/gpio_vbus.h> +#include <linux/reboot.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -911,10 +912,10 @@ static struct platform_device *devices[] __initdata = { static void tosa_poweroff(void) { - pxa_restart('g', NULL); + pxa_restart(REBOOT_GPIO, NULL); } -static void tosa_restart(char mode, const char *cmd) +static void tosa_restart(enum reboot_mode mode, const char *cmd) { uint32_t msc0 = __raw_readl(MSC0); @@ -969,7 +970,6 @@ static void __init fixup_tosa(struct tag *tags, char **cmdline, } MACHINE_START(TOSA, "SHARP Tosa") - .restart_mode = 'g', .fixup = fixup_tosa, .map_io = pxa25x_map_io, .nr_irqs = TOSA_NR_IRQS, diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index 5b1c8bf..c85ddb2 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -29,6 +29,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -418,7 +419,7 @@ static void __init realview_eb_timer_init(void) realview_eb_twd_init(); } -static void realview_eb_restart(char mode, const char *cmd) +static void realview_eb_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index d5e83a1..c5eade7 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -329,7 +330,7 @@ static void __init realview_pb1176_timer_init(void) realview_timer_init(IRQ_DC1176_TIMER0); } -static void realview_pb1176_restart(char mode, const char *cmd) +static void realview_pb1176_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index c3cfe21..f4b0962 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -29,6 +29,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -316,7 +317,7 @@ static void __init realview_pb11mp_timer_init(void) realview_pb11mp_twd_init(); } -static void realview_pb11mp_restart(char mode, const char *cmd) +static void realview_pb11mp_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index dde652a..10a3e1d 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -29,6 +29,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/mach-types.h> @@ -264,7 +265,7 @@ static void __init realview_pba8_timer_init(void) realview_timer_init(IRQ_PBA8_TIMER0_1); } -static void realview_pba8_restart(char mode, const char *cmd) +static void realview_pba8_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index 54f0185..9d75493 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -28,6 +28,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/mach-types.h> @@ -344,7 +345,7 @@ static void realview_pbx_fixup(struct tag *tags, char **from, #endif } -static void realview_pbx_restart(char mode, const char *cmd) +static void realview_pbx_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index a302cf5..09d602b 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -20,6 +20,7 @@ #include <linux/ata_platform.h> #include <linux/io.h> #include <linux/i2c.h> +#include <linux/reboot.h> #include <asm/elf.h> #include <asm/mach-types.h> @@ -201,7 +202,7 @@ static int __init rpc_init(void) arch_initcall(rpc_init); -static void rpc_restart(char mode, const char *cmd) +static void rpc_restart(enum reboot_mode mode, const char *cmd) { iomd_writeb(0, IOMD_ROMCR0); diff --git a/arch/arm/mach-s3c24xx/common.h b/arch/arm/mach-s3c24xx/common.h index 307c371..84b2806 100644 --- a/arch/arm/mach-s3c24xx/common.h +++ b/arch/arm/mach-s3c24xx/common.h @@ -12,6 +12,8 @@ #ifndef __ARCH_ARM_MACH_S3C24XX_COMMON_H #define __ARCH_ARM_MACH_S3C24XX_COMMON_H __FILE__ +#include <linux/reboot.h> + struct s3c2410_uartcfg; #ifdef CONFIG_CPU_S3C2410 @@ -20,7 +22,7 @@ extern int s3c2410a_init(void); extern void s3c2410_map_io(void); extern void s3c2410_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2410_init_clocks(int xtal); -extern void s3c2410_restart(char mode, const char *cmd); +extern void s3c2410_restart(enum reboot_mode mode, const char *cmd); extern void s3c2410_init_irq(void); #else #define s3c2410_init_clocks NULL @@ -36,7 +38,7 @@ extern void s3c2412_map_io(void); extern void s3c2412_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2412_init_clocks(int xtal); extern int s3c2412_baseclk_add(void); -extern void s3c2412_restart(char mode, const char *cmd); +extern void s3c2412_restart(enum reboot_mode mode, const char *cmd); extern void s3c2412_init_irq(void); #else #define s3c2412_init_clocks NULL @@ -51,7 +53,7 @@ extern void s3c2416_map_io(void); extern void s3c2416_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2416_init_clocks(int xtal); extern int s3c2416_baseclk_add(void); -extern void s3c2416_restart(char mode, const char *cmd); +extern void s3c2416_restart(enum reboot_mode mode, const char *cmd); extern void s3c2416_init_irq(void); extern struct syscore_ops s3c2416_irq_syscore_ops; @@ -66,7 +68,7 @@ extern struct syscore_ops s3c2416_irq_syscore_ops; extern void s3c244x_map_io(void); extern void s3c244x_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c244x_init_clocks(int xtal); -extern void s3c244x_restart(char mode, const char *cmd); +extern void s3c244x_restart(enum reboot_mode mode, const char *cmd); #else #define s3c244x_init_clocks NULL #define s3c244x_init_uarts NULL @@ -96,7 +98,7 @@ extern void s3c2443_map_io(void); extern void s3c2443_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2443_init_clocks(int xtal); extern int s3c2443_baseclk_add(void); -extern void s3c2443_restart(char mode, const char *cmd); +extern void s3c2443_restart(enum reboot_mode mode, const char *cmd); extern void s3c2443_init_irq(void); #else #define s3c2443_init_clocks NULL diff --git a/arch/arm/mach-s3c24xx/s3c2410.c b/arch/arm/mach-s3c24xx/s3c2410.c index ff384ac..34676d1 100644 --- a/arch/arm/mach-s3c24xx/s3c2410.c +++ b/arch/arm/mach-s3c24xx/s3c2410.c @@ -22,6 +22,7 @@ #include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/io.h> #include <asm/mach/arch.h> @@ -196,9 +197,9 @@ int __init s3c2410a_init(void) return s3c2410_init(); } -void s3c2410_restart(char mode, const char *cmd) +void s3c2410_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { soft_restart(0); } diff --git a/arch/arm/mach-s3c24xx/s3c2412.c b/arch/arm/mach-s3c24xx/s3c2412.c index 0f864d4..0251650c 100644 --- a/arch/arm/mach-s3c24xx/s3c2412.c +++ b/arch/arm/mach-s3c24xx/s3c2412.c @@ -22,6 +22,7 @@ #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -129,9 +130,9 @@ static void s3c2412_idle(void) cpu_do_idle(); } -void s3c2412_restart(char mode, const char *cmd) +void s3c2412_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); /* errata "Watch-dog/Software Reset Problem" specifies that diff --git a/arch/arm/mach-s3c24xx/s3c2416.c b/arch/arm/mach-s3c24xx/s3c2416.c index b9c5d38..9ef3ccf 100644 --- a/arch/arm/mach-s3c24xx/s3c2416.c +++ b/arch/arm/mach-s3c24xx/s3c2416.c @@ -35,6 +35,7 @@ #include <linux/syscore_ops.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -79,9 +80,9 @@ static struct device s3c2416_dev = { .bus = &s3c2416_subsys, }; -void s3c2416_restart(char mode, const char *cmd) +void s3c2416_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); __raw_writel(S3C2443_SWRST_RESET, S3C2443_SWRST); diff --git a/arch/arm/mach-s3c24xx/s3c2443.c b/arch/arm/mach-s3c24xx/s3c2443.c index 8328cd6..b6c7191 100644 --- a/arch/arm/mach-s3c24xx/s3c2443.c +++ b/arch/arm/mach-s3c24xx/s3c2443.c @@ -22,6 +22,7 @@ #include <linux/device.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -59,9 +60,9 @@ static struct device s3c2443_dev = { .bus = &s3c2443_subsys, }; -void s3c2443_restart(char mode, const char *cmd) +void s3c2443_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); __raw_writel(S3C2443_SWRST_RESET, S3C2443_SWRST); diff --git a/arch/arm/mach-s3c24xx/s3c244x.c b/arch/arm/mach-s3c24xx/s3c244x.c index d0423e2..911b555 100644 --- a/arch/arm/mach-s3c24xx/s3c244x.c +++ b/arch/arm/mach-s3c24xx/s3c244x.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/device.h> #include <linux/syscore_ops.h> #include <linux/clk.h> @@ -198,9 +199,9 @@ struct syscore_ops s3c244x_pm_syscore_ops = { .resume = s3c244x_resume, }; -void s3c244x_restart(char mode, const char *cmd) +void s3c244x_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); samsung_wdt_reset(); diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c index 1aed6f4..3f62e46 100644 --- a/arch/arm/mach-s3c64xx/common.c +++ b/arch/arm/mach-s3c64xx/common.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/io.h> #include <linux/dma-mapping.h> #include <linux/irq.h> @@ -381,9 +382,9 @@ static int __init s3c64xx_init_irq_eint(void) } arch_initcall(s3c64xx_init_irq_eint); -void s3c64xx_restart(char mode, const char *cmd) +void s3c64xx_restart(enum reboot_mode mode, const char *cmd) { - if (mode != 's') + if (mode != REBOOT_SOFT) samsung_wdt_reset(); /* if all else fails, or mode was for soft, jump to 0 */ diff --git a/arch/arm/mach-s3c64xx/common.h b/arch/arm/mach-s3c64xx/common.h index 6cfc99b..e8f990b 100644 --- a/arch/arm/mach-s3c64xx/common.h +++ b/arch/arm/mach-s3c64xx/common.h @@ -17,13 +17,15 @@ #ifndef __ARCH_ARM_MACH_S3C64XX_COMMON_H #define __ARCH_ARM_MACH_S3C64XX_COMMON_H +#include <linux/reboot.h> + void s3c64xx_init_irq(u32 vic0, u32 vic1); void s3c64xx_init_io(struct map_desc *mach_desc, int size); void s3c64xx_register_clocks(unsigned long xtal, unsigned armclk_limit); void s3c64xx_setup_clocks(void); -void s3c64xx_restart(char mode, const char *cmd); +void s3c64xx_restart(enum reboot_mode mode, const char *cmd); void s3c64xx_init_late(void); #ifdef CONFIG_CPU_S3C6400 diff --git a/arch/arm/mach-s5p64x0/common.c b/arch/arm/mach-s5p64x0/common.c index 76d0053..dfdfdc3 100644 --- a/arch/arm/mach-s5p64x0/common.c +++ b/arch/arm/mach-s5p64x0/common.c @@ -24,6 +24,7 @@ #include <linux/dma-mapping.h> #include <linux/gpio.h> #include <linux/irq.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/proc-fns.h> @@ -439,9 +440,9 @@ static int __init s5p64x0_init_irq_eint(void) } arch_initcall(s5p64x0_init_irq_eint); -void s5p64x0_restart(char mode, const char *cmd) +void s5p64x0_restart(enum reboot_mode mode, const char *cmd) { - if (mode != 's') + if (mode != REBOOT_SOFT) samsung_wdt_reset(); soft_restart(0); diff --git a/arch/arm/mach-s5p64x0/common.h b/arch/arm/mach-s5p64x0/common.h index f8a60fd..f3a9b43 100644 --- a/arch/arm/mach-s5p64x0/common.h +++ b/arch/arm/mach-s5p64x0/common.h @@ -12,6 +12,8 @@ #ifndef __ARCH_ARM_MACH_S5P64X0_COMMON_H #define __ARCH_ARM_MACH_S5P64X0_COMMON_H +#include <linux/reboot.h> + void s5p6440_init_irq(void); void s5p6450_init_irq(void); void s5p64x0_init_io(struct map_desc *mach_desc, int size); @@ -22,7 +24,7 @@ void s5p6440_setup_clocks(void); void s5p6450_register_clocks(void); void s5p6450_setup_clocks(void); -void s5p64x0_restart(char mode, const char *cmd); +void s5p64x0_restart(enum reboot_mode mode, const char *cmd); #ifdef CONFIG_CPU_S5P6440 diff --git a/arch/arm/mach-s5pc100/common.c b/arch/arm/mach-s5pc100/common.c index 5110315..4bdfecf 100644 --- a/arch/arm/mach-s5pc100/common.c +++ b/arch/arm/mach-s5pc100/common.c @@ -24,6 +24,7 @@ #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/sched.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/proc-fns.h> @@ -217,9 +218,9 @@ void __init s5pc100_init_uarts(struct s3c2410_uartcfg *cfg, int no) s3c24xx_init_uartdevs("s3c6400-uart", s5p_uart_resources, cfg, no); } -void s5pc100_restart(char mode, const char *cmd) +void s5pc100_restart(enum reboot_mode mode, const char *cmd) { - if (mode != 's') + if (mode != REBOOT_SOFT) samsung_wdt_reset(); soft_restart(0); diff --git a/arch/arm/mach-s5pc100/common.h b/arch/arm/mach-s5pc100/common.h index c41f912..08d782d 100644 --- a/arch/arm/mach-s5pc100/common.h +++ b/arch/arm/mach-s5pc100/common.h @@ -12,13 +12,15 @@ #ifndef __ARCH_ARM_MACH_S5PC100_COMMON_H #define __ARCH_ARM_MACH_S5PC100_COMMON_H +#include <linux/reboot.h> + void s5pc100_init_io(struct map_desc *mach_desc, int size); void s5pc100_init_irq(void); void s5pc100_register_clocks(void); void s5pc100_setup_clocks(void); -void s5pc100_restart(char mode, const char *cmd); +void s5pc100_restart(enum reboot_mode mode, const char *cmd); extern int s5pc100_init(void); extern void s5pc100_map_io(void); diff --git a/arch/arm/mach-s5pv210/common.c b/arch/arm/mach-s5pv210/common.c index 9dfe93e..023f1a7 100644 --- a/arch/arm/mach-s5pv210/common.c +++ b/arch/arm/mach-s5pv210/common.c @@ -143,7 +143,7 @@ static struct map_desc s5pv210_iodesc[] __initdata = { } }; -void s5pv210_restart(char mode, const char *cmd) +void s5pv210_restart(enum reboot_mode mode, const char *cmd) { __raw_writel(0x1, S5P_SWRESET); } diff --git a/arch/arm/mach-s5pv210/common.h b/arch/arm/mach-s5pv210/common.h index 0a1cc0ae..fe1beb5 100644 --- a/arch/arm/mach-s5pv210/common.h +++ b/arch/arm/mach-s5pv210/common.h @@ -12,13 +12,15 @@ #ifndef __ARCH_ARM_MACH_S5PV210_COMMON_H #define __ARCH_ARM_MACH_S5PV210_COMMON_H +#include <linux/reboot.h> + void s5pv210_init_io(struct map_desc *mach_desc, int size); void s5pv210_init_irq(void); void s5pv210_register_clocks(void); void s5pv210_setup_clocks(void); -void s5pv210_restart(char mode, const char *cmd); +void s5pv210_restart(enum reboot_mode mode, const char *cmd); extern int s5pv210_init(void); extern void s5pv210_map_io(void); diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 9db3e98..f25b611 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -19,6 +19,7 @@ #include <linux/cpufreq.h> #include <linux/ioport.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <video/sa1100fb.h> @@ -131,9 +132,9 @@ static void sa1100_power_off(void) PMCR = PMCR_SF; } -void sa11x0_restart(char mode, const char *cmd) +void sa11x0_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 2abc6a1..9a33695 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -3,12 +3,13 @@ * * Author: Nicolas Pitre */ +#include <linux/reboot.h> extern void sa1100_timer_init(void); extern void __init sa1100_map_io(void); extern void __init sa1100_init_irq(void); extern void __init sa1100_init_gpio(void); -extern void sa11x0_restart(char, const char *); +extern void sa11x0_restart(enum reboot_mode, const char *); extern void sa11x0_init_late(void); #define SET_BANK(__nr,__start,__size) \ diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c index 1535557..1d32c5e 100644 --- a/arch/arm/mach-shark/core.c +++ b/arch/arm/mach-shark/core.c @@ -11,6 +11,7 @@ #include <linux/serial_8250.h> #include <linux/io.h> #include <linux/cpu.h> +#include <linux/reboot.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -24,7 +25,7 @@ #define ROMCARD_SIZE 0x08000000 #define ROMCARD_START 0x10000000 -static void shark_restart(char mode, const char *cmd) +static void shark_restart(enum reboot_mode mode, const char *cmd) { short temp; /* Reset the Machine via pc[3] of the sequoia chipset */ diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 44a6215..45221fd 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -42,6 +42,7 @@ #include <linux/mmc/sh_mmcif.h> #include <linux/mmc/sh_mobile_sdhi.h> #include <linux/i2c-gpio.h> +#include <linux/reboot.h> #include <mach/common.h> #include <mach/irqs.h> #include <mach/r8a7740.h> @@ -1259,7 +1260,7 @@ static void __init eva_add_early_devices(void) } #define RESCNT2 IOMEM(0xe6188020) -static void eva_restart(char mode, const char *cmd) +static void eva_restart(enum reboot_mode mode, const char *cmd) { /* Do soft power on reset */ writel((1 << 31), RESCNT2); diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 165483c9..1068120 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -34,6 +34,7 @@ #include <linux/pinctrl/machine.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> #include <linux/smsc911x.h> @@ -890,7 +891,7 @@ static void __init kzm_init(void) sh73a0_pm_init(); } -static void kzm9g_restart(char mode, const char *cmd) +static void kzm9g_restart(enum reboot_mode mode, const char *cmd) { #define RESCNT2 IOMEM(0xe6188020) /* Do soft power on reset */ diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index 8ea11b47..bfce964 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -19,6 +19,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/reboot.h> #include <asm/hardware/cache-l2x0.h> #include <asm/mach/arch.h> @@ -89,13 +90,13 @@ static void __init socfpga_init_irq(void) socfpga_sysmgr_init(); } -static void socfpga_cyclone5_restart(char mode, const char *cmd) +static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) { u32 temp; temp = readl(rst_manager_base_addr + SOCFPGA_RSTMGR_CTRL); - if (mode == 'h') + if (mode == REBOOT_HARD) temp |= RSTMGR_CTRL_SWCOLDRSTREQ; else temp |= RSTMGR_CTRL_SWWARMRSTREQ; diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index a9fd453..904f2c9 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -16,6 +16,8 @@ #include <linux/dmaengine.h> #include <linux/amba/pl08x.h> #include <linux/init.h> +#include <linux/reboot.h> + #include <asm/mach/time.h> extern void spear13xx_timer_init(void); @@ -32,7 +34,7 @@ void __init spear6xx_clk_init(void __iomem *misc_base); void __init spear13xx_map_io(void); void __init spear13xx_l2x0_init(void); -void spear_restart(char, const char *); +void spear_restart(enum reboot_mode, const char *); void spear13xx_secondary_startup(void); void __cpuinit spear13xx_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-spear/restart.c b/arch/arm/mach-spear/restart.c index 2b44500..ce5e098 100644 --- a/arch/arm/mach-spear/restart.c +++ b/arch/arm/mach-spear/restart.c @@ -12,14 +12,15 @@ */ #include <linux/io.h> #include <linux/amba/sp810.h> +#include <linux/reboot.h> #include <asm/system_misc.h> #include <mach/spear.h> #include "generic.h" #define SPEAR13XX_SYS_SW_RES (VA_MISC_BASE + 0x204) -void spear_restart(char mode, const char *cmd) +void spear_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* software reset, Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 84485a1..38a3c55 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -18,6 +18,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/io.h> +#include <linux/reboot.h> #include <linux/clk/sunxi.h> @@ -33,7 +34,7 @@ static void __iomem *wdt_base; -static void sun4i_restart(char mode, const char *cmd) +static void sun4i_restart(enum reboot_mode mode, const char *cmd) { if (!wdt_base) return; diff --git a/arch/arm/mach-tegra/board.h b/arch/arm/mach-tegra/board.h index 1787327..9a6659f 100644 --- a/arch/arm/mach-tegra/board.h +++ b/arch/arm/mach-tegra/board.h @@ -23,8 +23,9 @@ #define __MACH_TEGRA_BOARD_H #include <linux/types.h> +#include <linux/reboot.h> -void tegra_assert_system_reset(char mode, const char *cmd); +void tegra_assert_system_reset(enum reboot_mode mode, const char *cmd); void __init tegra_init_early(void); void __init tegra_map_common_io(void); diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index b25153e..94a119a 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -22,6 +22,7 @@ #include <linux/io.h> #include <linux/clk.h> #include <linux/delay.h> +#include <linux/reboot.h> #include <linux/irqchip.h> #include <linux/clk-provider.h> @@ -68,7 +69,7 @@ void __init tegra_dt_init_irq(void) } #endif -void tegra_assert_system_reset(char mode, const char *cmd) +void tegra_assert_system_reset(enum reboot_mode mode, const char *cmd) { void __iomem *reset = IO_ADDRESS(TEGRA_PMC_BASE + 0); u32 reg; diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index 4f7ac2a..35670b1 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -300,11 +300,11 @@ static void __init u300_init_check_chip(void) /* Forward declare this function from the watchdog */ void coh901327_watchdog_reset(void); -static void u300_restart(char mode, const char *cmd) +static void u300_restart(enum reboot_mode mode, const char *cmd) { switch (mode) { - case 's': - case 'h': + case REBOOT_SOFT: + case REBOOT_HARD: #ifdef CONFIG_COH901327_WATCHDOG coh901327_watchdog_reset(); #endif diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 54bb80b..3b0572f 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -38,6 +38,7 @@ #include <linux/clkdev.h> #include <linux/mtd/physmap.h> #include <linux/bitops.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/hardware/arm_timer.h> @@ -733,7 +734,7 @@ static void versatile_leds_event(led_event_t ledevt) } #endif /* CONFIG_LEDS */ -void versatile_restart(char mode, const char *cmd) +void versatile_restart(enum reboot_mode mode, const char *cmd) { void __iomem *sys = __io_address(VERSATILE_SYS_BASE); u32 val; diff --git a/arch/arm/mach-versatile/core.h b/arch/arm/mach-versatile/core.h index 5c1b87d..f06d576 100644 --- a/arch/arm/mach-versatile/core.h +++ b/arch/arm/mach-versatile/core.h @@ -24,13 +24,14 @@ #include <linux/amba/bus.h> #include <linux/of_platform.h> +#include <linux/reboot.h> extern void __init versatile_init(void); extern void __init versatile_init_early(void); extern void __init versatile_init_irq(void); extern void __init versatile_map_io(void); extern void versatile_timer_init(void); -extern void versatile_restart(char, const char *); +extern void versatile_restart(enum reboot_mode, const char *); extern unsigned int mmc_status(struct device *dev); #ifdef CONFIG_OF extern struct of_dev_auxdata versatile_auxdata_lookup[]; diff --git a/arch/arm/mach-vt8500/vt8500.c b/arch/arm/mach-vt8500/vt8500.c index f8f2f00..eefaa60 100644 --- a/arch/arm/mach-vt8500/vt8500.c +++ b/arch/arm/mach-vt8500/vt8500.c @@ -21,6 +21,7 @@ #include <linux/clocksource.h> #include <linux/io.h> #include <linux/pm.h> +#include <linux/reboot.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -46,7 +47,7 @@ static void __iomem *pmc_base; -void vt8500_restart(char mode, const char *cmd) +void vt8500_restart(enum reboot_mode mode, const char *cmd) { if (pmc_base) writel(1, pmc_base + VT8500_PMSR_REG); diff --git a/arch/arm/mach-w90x900/cpu.c b/arch/arm/mach-w90x900/cpu.c index 9e4dd8b..b1eabaa 100644 --- a/arch/arm/mach-w90x900/cpu.c +++ b/arch/arm/mach-w90x900/cpu.c @@ -230,9 +230,9 @@ void __init nuc900_init_clocks(void) #define WTE (1 << 7) #define WTRE (1 << 1) -void nuc9xx_restart(char mode, const char *cmd) +void nuc9xx_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-w90x900/nuc9xx.h b/arch/arm/mach-w90x900/nuc9xx.h index 88ef4b2..e3ab1e1 100644 --- a/arch/arm/mach-w90x900/nuc9xx.h +++ b/arch/arm/mach-w90x900/nuc9xx.h @@ -14,10 +14,13 @@ * published by the Free Software Foundation. * */ + +#include <linux/reboot.h> + struct map_desc; /* core initialisation functions */ extern void nuc900_init_irq(void); extern void nuc900_timer_init(void); -extern void nuc9xx_restart(char, const char *); +extern void nuc9xx_restart(enum reboot_mode, const char *); diff --git a/arch/arm/plat-iop/gpio.c b/arch/arm/plat-iop/gpio.c index e4de9be..697de6d 100644 --- a/arch/arm/plat-iop/gpio.c +++ b/arch/arm/plat-iop/gpio.c @@ -17,6 +17,7 @@ #include <linux/gpio.h> #include <linux/export.h> #include <asm/hardware/iop3xx.h> +#include <mach/gpio.h> void gpio_line_config(int line, int direction) { diff --git a/arch/arm/plat-iop/restart.c b/arch/arm/plat-iop/restart.c index 33fa699..3a4d5e5 100644 --- a/arch/arm/plat-iop/restart.c +++ b/arch/arm/plat-iop/restart.c @@ -11,7 +11,7 @@ #include <asm/system_misc.h> #include <mach/hardware.h> -void iop3xx_restart(char mode, const char *cmd) +void iop3xx_restart(enum reboot_mode mode, const char *cmd) { *IOP3XX_PCSR = 0x30; diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 2c75bf7..8fddf46 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -224,8 +224,10 @@ do_sigbus: */ out_of_memory: up_read(&mm->mmap_sem); - if (user_mode(regs)) - do_group_exit(SIGKILL); + if (user_mode(regs)) { + pagefault_out_of_memory(); + return 1; + } no_context: /* Are we prepared to handle this kernel fault? */ diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index d48a84f..8a2e6de 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -345,9 +345,10 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - printk(KERN_ALERT "VM: killing process %s\n", tsk->comm); - if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) - do_exit(SIGKILL); + if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) { + pagefault_out_of_memory(); + return; + } goto no_context; do_sigbus: diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index e2bfafc..4a41f84 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -267,10 +267,10 @@ out_of_memory: __asm__ __volatile__("l.nop 1"); up_read(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_exit(SIGKILL); - goto no_context; + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 64f7bd5..9a0d24c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -975,16 +975,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = 8; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(task) < 0) - return -ESRCH; - bp = thread->ptrace_bps[0]; if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } - ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -997,11 +993,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ret = modify_user_hw_breakpoint(bp, &attr); if (ret) { - ptrace_put_breakpoints(task); return ret; } thread->ptrace_bps[0] = bp; - ptrace_put_breakpoints(task); thread->hw_brk = hw_brk; return 0; } @@ -1016,12 +1010,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, NULL, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; - ptrace_put_breakpoints(task); return PTR_ERR(bp); } - ptrace_put_breakpoints(task); - #endif /* CONFIG_HAVE_HW_BREAKPOINT */ task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -1440,26 +1431,19 @@ static long ppc_set_hwdebug(struct task_struct *child, if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) brk.type |= HW_BRK_TYPE_WRITE; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - /* * Check if the request is for 'range' breakpoints. We can * support it if range < 8 bytes. */ - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) len = bp_info->addr2 - bp_info->addr; - } else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) len = 1; - else { - ptrace_put_breakpoints(child); + else return -EINVAL; - } bp = thread->ptrace_bps[0]; - if (bp) { - ptrace_put_breakpoints(child); + if (bp) return -ENOSPC; - } /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); @@ -1471,11 +1455,9 @@ static long ppc_set_hwdebug(struct task_struct *child, ptrace_triggered, NULL, child); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; - ptrace_put_breakpoints(child); return PTR_ERR(bp); } - ptrace_put_breakpoints(child); return 1; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ @@ -1519,16 +1501,12 @@ static long ppc_del_hwdebug(struct task_struct *child, long data) return -EINVAL; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - bp = thread->ptrace_bps[0]; if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } else ret = -ENOENT; - ptrace_put_breakpoints(child); return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ if (child->thread.hw_brk.address == 0) diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 47b600e..6b18fb0 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -172,10 +172,10 @@ out_of_memory: down_read(&mm->mmap_sem); goto survive; } - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - goto no_context; + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 81f999a..668c816 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -117,11 +117,7 @@ void user_enable_single_step(struct task_struct *child) set_tsk_thread_flag(child, TIF_SINGLESTEP); - if (ptrace_get_breakpoints(child) < 0) - return; - set_single_step(child, pc); - ptrace_put_breakpoints(child); } void user_disable_single_step(struct task_struct *child) diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 3d2b81c..f7f99f9 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -573,10 +573,10 @@ out_of_memory: down_read(&mm->mmap_sem); goto survive; } - pr_alert("VM: killing process %s\n", tsk->comm); - if (!is_kernel_mode) - do_group_exit(SIGKILL); - goto no_context; + if (is_kernel_mode) + goto no_context; + pagefault_out_of_memory(); + return 0; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index c944769..778ebba 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -51,16 +51,6 @@ void arch_cpu_idle(void) local_irq_enable(); } -static char reboot_mode = 'h'; - -int __init reboot_setup(char *str) -{ - reboot_mode = str[0]; - return 1; -} - -__setup("reboot=", reboot_setup); - void machine_halt(void) { gpio_set_value(GPO_SOFT_OFF, 0); @@ -88,7 +78,7 @@ void machine_restart(char *cmd) * we may need it to insert some 1:1 mappings so that * soft boot works. */ - setup_mm_for_reboot(reboot_mode); + setup_mm_for_reboot(); /* Clean and invalidate caches */ flush_cache_all(); @@ -102,7 +92,7 @@ void machine_restart(char *cmd) /* * Now handle reboot code. */ - if (reboot_mode == 's') { + if (reboot_mode == REBOOT_SOFT) { /* Jump into ROM at address 0xffff0000 */ cpu_reset(VECTORS_BASE); } else { diff --git a/arch/unicore32/kernel/setup.h b/arch/unicore32/kernel/setup.h index 30f749d..f5c51b8 100644 --- a/arch/unicore32/kernel/setup.h +++ b/arch/unicore32/kernel/setup.h @@ -22,7 +22,7 @@ extern void puv3_ps2_init(void); extern void pci_puv3_preinit(void); extern void __init puv3_init_gpio(void); -extern void setup_mm_for_reboot(char mode); +extern void setup_mm_for_reboot(void); extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 43c20b4..4f5a532 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -445,7 +445,7 @@ void __init paging_init(void) * the user-mode pages. This will then ensure that we have predictable * results when turning the mmu off */ -void setup_mm_for_reboot(char mode) +void setup_mm_for_reboot(void) { unsigned long base_pmdval; pgd_t *pgd; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 265c672..b32ebf9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -65,6 +65,7 @@ config X86 select HAVE_KERNEL_LZMA select HAVE_KERNEL_XZ select HAVE_KERNEL_LZO + select HAVE_KERNEL_LZ4 select HAVE_HW_BREAKPOINT select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 5ef205c..dcd90df 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,8 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -63,12 +64,15 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lz4) suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7cb56c6..0319c88 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -145,6 +145,10 @@ static int lines, cols; #include "../../../../lib/decompress_unlzo.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + static void scroll(void) { int i; diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index 75ce3f4..77a99ac 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -1,18 +1,6 @@ #ifndef _ASM_X86_EMERGENCY_RESTART_H #define _ASM_X86_EMERGENCY_RESTART_H -enum reboot_type { - BOOT_TRIPLE = 't', - BOOT_KBD = 'k', - BOOT_BIOS = 'b', - BOOT_ACPI = 'a', - BOOT_EFI = 'e', - BOOT_CF9 = 'p', - BOOT_CF9_COND = 'q', -}; - -extern enum reboot_type reboot_type; - extern void machine_emergency_restart(void); #endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 39cc7f7..63092af 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -25,6 +25,7 @@ #include <linux/kdebug.h> #include <linux/delay.h> #include <linux/crash_dump.h> +#include <linux/reboot.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -36,7 +37,6 @@ #include <asm/ipi.h> #include <asm/smp.h> #include <asm/x86_init.h> -#include <asm/emergency-restart.h> #include <asm/nmi.h> /* BMC sets a bit this MMR non-zero before sending an NMI */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 02f0763..f66ff16 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -393,6 +393,9 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) unregister_hw_breakpoint(t->ptrace_bps[i]); t->ptrace_bps[i] = NULL; } + + t->debugreg6 = 0; + t->ptrace_dr7 = 0; } void hw_breakpoint_restore(void) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 29a8120..7461f50 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -601,30 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } -static int -ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, - struct task_struct *tsk, int disabled) +static int ptrace_fill_bp_fields(struct perf_event_attr *attr, + int len, int type, bool disabled) +{ + int err, bp_len, bp_type; + + err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); + if (!err) { + attr->bp_len = bp_len; + attr->bp_type = bp_type; + attr->disabled = disabled; + } + + return err; +} + +static struct perf_event * +ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, + unsigned long addr, bool disabled) { - int err; - int gen_len, gen_type; struct perf_event_attr attr; + int err; - /* - * We should have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) - return -EINVAL; + ptrace_breakpoint_init(&attr); + attr.bp_addr = addr; - err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + err = ptrace_fill_bp_fields(&attr, len, type, disabled); if (err) - return err; + return ERR_PTR(err); + + return register_user_hw_breakpoint(&attr, ptrace_triggered, + NULL, tsk); +} - attr = bp->attr; - attr.bp_len = gen_len; - attr.bp_type = gen_type; - attr.disabled = disabled; +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + int disabled) +{ + struct perf_event_attr attr = bp->attr; + int err; + + err = ptrace_fill_bp_fields(&attr, len, type, disabled); + if (err) + return err; return modify_user_hw_breakpoint(bp, &attr); } @@ -634,67 +652,50 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *thread = &tsk->thread; unsigned long old_dr7; - int i, orig_ret = 0, rc = 0; - int enabled, second_pass = 0; - unsigned len, type; - struct perf_event *bp; - - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; + bool second_pass = false; + int i, rc, ret = 0; data &= ~DR_CONTROL_RESERVED; old_dr7 = ptrace_get_dr7(thread->ptrace_bps); + restore: - /* - * Loop through all the hardware breakpoints, making the - * appropriate changes to each. - */ + rc = 0; for (i = 0; i < HBP_NUM; i++) { - enabled = decode_dr7(data, i, &len, &type); - bp = thread->ptrace_bps[i]; - - if (!enabled) { - if (bp) { - /* - * Don't unregister the breakpoints right-away, - * unless all register_user_hw_breakpoint() - * requests have succeeded. This prevents - * any window of opportunity for debug - * register grabbing by other users. - */ - if (!second_pass) - continue; - - rc = ptrace_modify_breakpoint(bp, len, type, - tsk, 1); - if (rc) - break; + unsigned len, type; + bool disabled = !decode_dr7(data, i, &len, &type); + struct perf_event *bp = thread->ptrace_bps[i]; + + if (!bp) { + if (disabled) + continue; + + bp = ptrace_register_breakpoint(tsk, + len, type, 0, disabled); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + break; } + + thread->ptrace_bps[i] = bp; continue; } - rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + rc = ptrace_modify_breakpoint(bp, len, type, disabled); if (rc) break; } - /* - * Make a second pass to free the remaining unused breakpoints - * or to restore the original breakpoints if an error occurred. - */ - if (!second_pass) { - second_pass = 1; - if (rc < 0) { - orig_ret = rc; - data = old_dr7; - } + + /* Restore if the first pass failed, second_pass shouldn't fail. */ + if (rc && !WARN_ON(second_pass)) { + ret = rc; + data = old_dr7; + second_pass = true; goto restore; } - ptrace_put_breakpoints(tsk); - - return ((orig_ret < 0) ? orig_ret : rc); + return ret; } /* @@ -702,25 +703,17 @@ restore: */ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *thread = &tsk->thread; unsigned long val = 0; if (n < HBP_NUM) { - struct perf_event *bp; + struct perf_event *bp = thread->ptrace_bps[n]; - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - bp = thread->ptrace_bps[n]; - if (!bp) - val = 0; - else + if (bp) val = bp->hw.info.address; - - ptrace_put_breakpoints(tsk); } else if (n == 6) { val = thread->debugreg6; - } else if (n == 7) { + } else if (n == 7) { val = thread->ptrace_dr7; } return val; @@ -729,29 +722,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, unsigned long addr) { - struct perf_event *bp; struct thread_struct *t = &tsk->thread; - struct perf_event_attr attr; + struct perf_event *bp = t->ptrace_bps[nr]; int err = 0; - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - if (!t->ptrace_bps[nr]) { - ptrace_breakpoint_init(&attr); - /* - * Put stub len and type to register (reserve) an inactive but - * correct bp - */ - attr.bp_addr = addr; - attr.bp_len = HW_BREAKPOINT_LEN_1; - attr.bp_type = HW_BREAKPOINT_W; - attr.disabled = 1; - - bp = register_user_hw_breakpoint(&attr, ptrace_triggered, - NULL, tsk); - + if (!bp) { /* + * Put stub len and type to create an inactive but correct bp. + * * CHECKME: the previous code returned -EIO if the addr wasn't * a valid task virtual addr. The new one will return -EINVAL in * this case. @@ -760,22 +738,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, * writing for the user. And anyway this is the previous * behaviour. */ - if (IS_ERR(bp)) { + bp = ptrace_register_breakpoint(tsk, + X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, + addr, true); + if (IS_ERR(bp)) err = PTR_ERR(bp); - goto put; - } - - t->ptrace_bps[nr] = bp; + else + t->ptrace_bps[nr] = bp; } else { - bp = t->ptrace_bps[nr]; + struct perf_event_attr attr = bp->attr; - attr = bp->attr; attr.bp_addr = addr; err = modify_user_hw_breakpoint(bp, &attr); } -put: - ptrace_put_breakpoints(tsk); return err; } @@ -785,30 +761,20 @@ put: static int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) { - struct thread_struct *thread = &(tsk->thread); - int rc = 0; - + struct thread_struct *thread = &tsk->thread; /* There are no DR4 or DR5 registers */ - if (n == 4 || n == 5) - return -EIO; + int rc = -EIO; - if (n == 6) { - thread->debugreg6 = val; - goto ret_path; - } if (n < HBP_NUM) { rc = ptrace_set_breakpoint_addr(tsk, n, val); - if (rc) - return rc; - } - /* All that's left is DR7 */ - if (n == 7) { + } else if (n == 6) { + thread->debugreg6 = val; + rc = 0; + } else if (n == 7) { rc = ptrace_write_dr7(tsk, val); if (!rc) thread->ptrace_dr7 = val; } - -ret_path: return rc; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 76fa1e9..563ed91 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -36,22 +36,6 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; -static int reboot_mode; -enum reboot_type reboot_type = BOOT_ACPI; -int reboot_force; - -/* - * This variable is used privately to keep track of whether or not - * reboot_type is still set to its default value (i.e., reboot= hasn't - * been set on the command line). This is needed so that we can - * suppress DMI scanning for reboot quirks. Without it, it's - * impossible to override a faulty reboot quirk without recompiling. - */ -static int reboot_default = 1; - -#ifdef CONFIG_SMP -static int reboot_cpu = -1; -#endif /* * This is set if we need to go through the 'emergency' path. @@ -64,79 +48,6 @@ static int reboot_emergency; bool port_cf9_safe = false; /* - * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] - * warm Don't set the cold reboot flag - * cold Set the cold reboot flag - * bios Reboot by jumping through the BIOS - * smp Reboot by executing reset on BSP or other CPU - * triple Force a triple fault (init) - * kbd Use the keyboard controller. cold reset (default) - * acpi Use the RESET_REG in the FADT - * efi Use efi reset_system runtime service - * pci Use the so-called "PCI reset register", CF9 - * force Avoid anything that could hang. - */ -static int __init reboot_setup(char *str) -{ - for (;;) { - /* - * Having anything passed on the command line via - * reboot= will cause us to disable DMI checking - * below. - */ - reboot_default = 0; - - switch (*str) { - case 'w': - reboot_mode = 0x1234; - break; - - case 'c': - reboot_mode = 0; - break; - -#ifdef CONFIG_SMP - case 's': - if (isdigit(*(str+1))) { - reboot_cpu = (int) (*(str+1) - '0'); - if (isdigit(*(str+2))) - reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); - } - /* - * We will leave sorting out the final value - * when we are ready to reboot, since we might not - * have detected BSP APIC ID or smp_num_cpu - */ - break; -#endif /* CONFIG_SMP */ - - case 'b': - case 'a': - case 'k': - case 't': - case 'e': - case 'p': - reboot_type = *str; - break; - - case 'f': - reboot_force = 1; - break; - } - - str = strchr(str, ','); - if (str) - str++; - else - break; - } - return 1; -} - -__setup("reboot=", reboot_setup); - - -/* * Reboot options and system auto-detection code provided by * Dell Inc. so their systems "just work". :-) */ @@ -536,6 +447,7 @@ static void native_machine_emergency_restart(void) int i; int attempt = 0; int orig_reboot_type = reboot_type; + unsigned short mode; if (reboot_emergency) emergency_vmx_disable_all(); @@ -543,7 +455,8 @@ static void native_machine_emergency_restart(void) tboot_shutdown(TB_SHUTDOWN_REBOOT); /* Tell the BIOS if we want cold or warm reboot */ - *((unsigned short *)__va(0x472)) = reboot_mode; + mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; + *((unsigned short *)__va(0x472)) = mode; for (;;) { /* Could also try the reset bit in the Hammer NB */ @@ -585,7 +498,7 @@ static void native_machine_emergency_restart(void) case BOOT_EFI: if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi.reset_system(reboot_mode ? + efi.reset_system(reboot_mode == REBOOT_WARM ? EFI_RESET_WARM : EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); @@ -614,26 +527,10 @@ void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP - - /* The boot cpu is always logical cpu 0 */ - int reboot_cpu_id = 0; - - /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && - cpu_online(reboot_cpu)) - reboot_cpu_id = reboot_cpu; - - /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) - reboot_cpu_id = smp_processor_id(); - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); - /* - * O.K Now that I'm on the appropriate processor, stop all of the - * others. Also disable the local irq to not receive the per-cpu - * timer interrupt which may trigger scheduler's load balance. + * Stop all of the others. Also disable the local irq to + * not receive the per-cpu timer interrupt which may trigger + * scheduler's load balance. */ local_irq_disable(); stop_other_cpus(); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 17fda6a..dfa537a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -240,7 +240,6 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) { pud_t *pud; - unsigned long addr; int i; if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ @@ -248,8 +247,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) pud = pud_offset(pgd, 0); - for (addr = i = 0; i < PREALLOCATED_PMDS; - i++, pud++, addr += PUD_SIZE) { + for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) { pmd_t *pmd = pmds[i]; if (i >= KERNEL_PGD_BOUNDARY) diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index 75a54e1..4cebb2f 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig @@ -68,6 +68,17 @@ config ACORN_PARTITION_RISCIX of machines called RISCiX. If you say 'Y' here, Linux will be able to read disks partitioned under RISCiX. +config AIX_PARTITION + bool "AIX basic partition table support" if PARTITION_ADVANCED + help + Say Y here if you would like to be able to read the hard disk + partition table format used by IBM or Motorola PowerPC machines + running AIX. AIX actually uses a Logical Volume Manager, where + "logical volumes" can be spread across one or multiple disks, + but this driver works only for the simple case of partitions which + are contiguous. + Otherwise, say N. + config OSF_PARTITION bool "Alpha OSF partition support" if PARTITION_ADVANCED default y if ALPHA diff --git a/block/partitions/Makefile b/block/partitions/Makefile index 03af8ea..2be4d7b 100644 --- a/block/partitions/Makefile +++ b/block/partitions/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_BLOCK) := check.o obj-$(CONFIG_ACORN_PARTITION) += acorn.o obj-$(CONFIG_AMIGA_PARTITION) += amiga.o obj-$(CONFIG_ATARI_PARTITION) += atari.o +obj-$(CONFIG_AIX_PARTITION) += aix.o obj-$(CONFIG_MAC_PARTITION) += mac.o obj-$(CONFIG_LDM_PARTITION) += ldm.o obj-$(CONFIG_MSDOS_PARTITION) += msdos.o diff --git a/block/partitions/aix.c b/block/partitions/aix.c new file mode 100644 index 0000000..43be471 --- /dev/null +++ b/block/partitions/aix.c @@ -0,0 +1,293 @@ +/* + * fs/partitions/aix.c + * + * Copyright (C) 2012-2013 Philippe De Muyter <phdm@macqel.be> + */ + +#include "check.h" +#include "aix.h" + +struct lvm_rec { + char lvm_id[4]; /* "_LVM" */ + char reserved4[16]; + __be32 lvmarea_len; + __be32 vgda_len; + __be32 vgda_psn[2]; + char reserved36[10]; + __be16 pp_size; /* log2(pp_size) */ + char reserved46[12]; + __be16 version; + }; + +struct vgda { + __be32 secs; + __be32 usec; + char reserved8[16]; + __be16 numlvs; + __be16 maxlvs; + __be16 pp_size; + __be16 numpvs; + __be16 total_vgdas; + __be16 vgda_size; + }; + +struct lvd { + __be16 lv_ix; + __be16 res2; + __be16 res4; + __be16 maxsize; + __be16 lv_state; + __be16 mirror; + __be16 mirror_policy; + __be16 num_lps; + __be16 res10[8]; + }; + +struct lvname { + char name[64]; + }; + +struct ppe { + __be16 lv_ix; + unsigned short res2; + unsigned short res4; + __be16 lp_ix; + unsigned short res8[12]; + }; + +struct pvd { + char reserved0[16]; + __be16 pp_count; + char reserved18[2]; + __be32 psn_part1; + char reserved24[8]; + struct ppe ppe[1016]; + }; + +#define LVM_MAXLVS 256 + +/** + * last_lba(): return number of last logical block of device + * @bdev: block device + * + * Description: Returns last LBA value on success, 0 on error. + * This is stored (by sd and ide-geometry) in + * the part[0] entry for this disk, and is the number of + * physical sectors available on the disk. + */ +static u64 last_lba(struct block_device *bdev) +{ + if (!bdev || !bdev->bd_inode) + return 0; + return (bdev->bd_inode->i_size >> 9) - 1ULL; +} + +/** + * read_lba(): Read bytes from disk, starting at given LBA + * @state + * @lba + * @buffer + * @count + * + * Description: Reads @count bytes from @state->bdev into @buffer. + * Returns number of bytes read on success, 0 on error. + */ +static size_t read_lba(struct parsed_partitions *state, u64 lba, u8 *buffer, + size_t count) +{ + size_t totalreadcount = 0; + + if (!buffer || lba + count / 512 > last_lba(state->bdev)) + return 0; + + while (count) { + int copied = 512; + Sector sect; + unsigned char *data = read_part_sector(state, lba++, §); + if (!data) + break; + if (copied > count) + copied = count; + memcpy(buffer, data, copied); + put_dev_sector(sect); + buffer += copied; + totalreadcount += copied; + count -= copied; + } + return totalreadcount; +} + +/** + * alloc_pvd(): reads physical volume descriptor + * @state + * @lba + * + * Description: Returns pvd on success, NULL on error. + * Allocates space for pvd and fill it with disk blocks at @lba + * Notes: remember to free pvd when you're done! + */ +static struct pvd *alloc_pvd(struct parsed_partitions *state, u32 lba) +{ + size_t count = sizeof(struct pvd); + struct pvd *p; + + p = kmalloc(count, GFP_KERNEL); + if (!p) + return NULL; + + if (read_lba(state, lba, (u8 *) p, count) < count) { + kfree(p); + return NULL; + } + return p; +} + +/** + * alloc_lvn(): reads logical volume names + * @state + * @lba + * + * Description: Returns lvn on success, NULL on error. + * Allocates space for lvn and fill it with disk blocks at @lba + * Notes: remember to free lvn when you're done! + */ +static struct lvname *alloc_lvn(struct parsed_partitions *state, u32 lba) +{ + size_t count = sizeof(struct lvname) * LVM_MAXLVS; + struct lvname *p; + + p = kmalloc(count, GFP_KERNEL); + if (!p) + return NULL; + + if (read_lba(state, lba, (u8 *) p, count) < count) { + kfree(p); + return NULL; + } + return p; +} + +int aix_partition(struct parsed_partitions *state) +{ + int ret = 0; + Sector sect; + unsigned char *d; + u32 pp_bytes_size; + u32 pp_blocks_size = 0; + u32 vgda_sector = 0; + u32 vgda_len = 0; + int numlvs = 0; + struct pvd *pvd; + struct lv_info { + unsigned short pps_per_lv; + unsigned short pps_found; + unsigned char lv_is_contiguous; + } *lvip; + struct lvname *n = NULL; + + d = read_part_sector(state, 7, §); + if (d) { + struct lvm_rec *p = (struct lvm_rec *)d; + u16 lvm_version = be16_to_cpu(p->version); + char tmp[64]; + + if (lvm_version == 1) { + int pp_size_log2 = be16_to_cpu(p->pp_size); + + pp_bytes_size = 1 << pp_size_log2; + pp_blocks_size = pp_bytes_size / 512; + snprintf(tmp, sizeof(tmp), + " AIX LVM header version %u found\n", + lvm_version); + vgda_len = be32_to_cpu(p->vgda_len); + vgda_sector = be32_to_cpu(p->vgda_psn[0]); + } else { + snprintf(tmp, sizeof(tmp), + " unsupported AIX LVM version %d found\n", + lvm_version); + } + strlcat(state->pp_buf, tmp, PAGE_SIZE); + put_dev_sector(sect); + } + if (vgda_sector && (d = read_part_sector(state, vgda_sector, §))) { + struct vgda *p = (struct vgda *)d; + + numlvs = be16_to_cpu(p->numlvs); + put_dev_sector(sect); + } + lvip = kzalloc(sizeof(struct lv_info) * state->limit, GFP_KERNEL); + if (!lvip) + return 0; + if (numlvs && (d = read_part_sector(state, vgda_sector + 1, §))) { + struct lvd *p = (struct lvd *)d; + int i; + + n = alloc_lvn(state, vgda_sector + vgda_len - 33); + if (n) { + int foundlvs = 0; + + for (i = 0; foundlvs < numlvs && i < state->limit; i += 1) { + lvip[i].pps_per_lv = be16_to_cpu(p[i].num_lps); + if (lvip[i].pps_per_lv) + foundlvs += 1; + } + } + put_dev_sector(sect); + } + pvd = alloc_pvd(state, vgda_sector + 17); + if (pvd) { + int numpps = be16_to_cpu(pvd->pp_count); + int psn_part1 = be32_to_cpu(pvd->psn_part1); + int i; + int cur_lv_ix = -1; + int next_lp_ix = 1; + int lp_ix; + + for (i = 0; i < numpps; i += 1) { + struct ppe *p = pvd->ppe + i; + unsigned int lv_ix; + + lp_ix = be16_to_cpu(p->lp_ix); + if (!lp_ix) { + next_lp_ix = 1; + continue; + } + lv_ix = be16_to_cpu(p->lv_ix) - 1; + if (lv_ix > state->limit) { + cur_lv_ix = -1; + continue; + } + lvip[lv_ix].pps_found += 1; + if (lp_ix == 1) { + cur_lv_ix = lv_ix; + next_lp_ix = 1; + } else if (lv_ix != cur_lv_ix || lp_ix != next_lp_ix) { + next_lp_ix = 1; + continue; + } + if (lp_ix == lvip[lv_ix].pps_per_lv) { + char tmp[70]; + + put_partition(state, lv_ix + 1, + (i + 1 - lp_ix) * pp_blocks_size + psn_part1, + lvip[lv_ix].pps_per_lv * pp_blocks_size); + snprintf(tmp, sizeof(tmp), " <%s>\n", + n[lv_ix].name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + lvip[lv_ix].lv_is_contiguous = 1; + ret = 1; + next_lp_ix = 1; + } else + next_lp_ix += 1; + } + for (i = 0; i < state->limit; i += 1) + if (lvip[i].pps_found && !lvip[i].lv_is_contiguous) + pr_warn("partition %s (%u pp's found) is " + "not contiguous\n", + n[i].name, lvip[i].pps_found); + kfree(pvd); + } + kfree(n); + kfree(lvip); + return ret; +} diff --git a/block/partitions/aix.h b/block/partitions/aix.h new file mode 100644 index 0000000..e0c66a9 --- /dev/null +++ b/block/partitions/aix.h @@ -0,0 +1 @@ +extern int aix_partition(struct parsed_partitions *state); diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 7681cd2..9123f25 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -23,6 +23,7 @@ #include "check.h" #include "msdos.h" #include "efi.h" +#include "aix.h" /* * Many architectures don't like unaligned accesses, while @@ -90,7 +91,7 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') ret = 1; put_dev_sector(sect); - }; + } return ret; } @@ -142,7 +143,7 @@ static void parse_extended(struct parsed_partitions *state, return; if (!msdos_magic_present(data + 510)) - goto done; + goto done; p = (struct partition *) (data + 0x1be); @@ -155,7 +156,7 @@ static void parse_extended(struct parsed_partitions *state, * and OS/2 seems to use all four entries. */ - /* + /* * First process the data partition(s) */ for (i=0; i<4; i++, p++) { @@ -263,7 +264,7 @@ static void parse_solaris_x86(struct parsed_partitions *state, } #if defined(CONFIG_BSD_DISKLABEL) -/* +/* * Create devices for BSD partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ @@ -294,7 +295,7 @@ static void parse_bsd(struct parsed_partitions *state, if (state->next == state->limit) break; - if (p->p_fstype == BSD_FS_UNUSED) + if (p->p_fstype == BSD_FS_UNUSED) continue; bsd_start = le32_to_cpu(p->p_offset); bsd_size = le32_to_cpu(p->p_size); @@ -441,7 +442,7 @@ static struct { {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86}, {0, NULL}, }; - + int msdos_partition(struct parsed_partitions *state) { sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; @@ -462,8 +463,12 @@ int msdos_partition(struct parsed_partitions *state) */ if (aix_magic_present(state, data)) { put_dev_sector(sect); +#ifdef CONFIG_AIX_PARTITION + return aix_partition(state); +#else strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); return 0; +#endif } if (!msdos_magic_present(data + 510)) { diff --git a/crypto/Kconfig b/crypto/Kconfig index 904ffe8..69ce573 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1336,6 +1336,22 @@ config CRYPTO_842 help This is the 842 algorithm. +config CRYPTO_LZ4 + tristate "LZ4 compression algorithm" + select CRYPTO_ALGAPI + select LZ4_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 algorithm. + +config CRYPTO_LZ4HC + tristate "LZ4HC compression algorithm" + select CRYPTO_ALGAPI + select LZ4HC_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 high compression mode algorithm. + comment "Random Number Generation" config CRYPTO_ANSI_CPRNG diff --git a/crypto/Makefile b/crypto/Makefile index 62af87d..2d5ed08 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -86,6 +86,8 @@ obj-$(CONFIG_CRYPTO_CRC32) += crc32.o obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o +obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o obj-$(CONFIG_CRYPTO_RNG2) += krng.o diff --git a/crypto/lz4.c b/crypto/lz4.c new file mode 100644 index 0000000..4586dd1 --- /dev/null +++ b/crypto/lz4.c @@ -0,0 +1,106 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min <chanho.min@lge.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/lz4.h> + +struct lz4_ctx { + void *lz4_comp_mem; +}; + +static int lz4_init(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS); + if (!ctx->lz4_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4_exit(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + vfree(ctx->lz4_comp_mem); +} + +static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + size_t __slen = slen; + + err = lz4_decompress(src, &__slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4 = { + .cra_name = "lz4", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list), + .cra_init = lz4_init, + .cra_exit = lz4_exit, + .cra_u = { .compress = { + .coa_compress = lz4_compress_crypto, + .coa_decompress = lz4_decompress_crypto } } +}; + +static int __init lz4_mod_init(void) +{ + return crypto_register_alg(&alg_lz4); +} + +static void __exit lz4_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4); +} + +module_init(lz4_mod_init); +module_exit(lz4_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Compression Algorithm"); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c new file mode 100644 index 0000000..151ba31 --- /dev/null +++ b/crypto/lz4hc.c @@ -0,0 +1,106 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min <chanho.min@lge.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/lz4.h> + +struct lz4hc_ctx { + void *lz4hc_comp_mem; +}; + +static int lz4hc_init(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS); + if (!ctx->lz4hc_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4hc_exit(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + vfree(ctx->lz4hc_comp_mem); +} + +static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + size_t __slen = slen; + + err = lz4_decompress(src, &__slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4hc = { + .cra_name = "lz4hc", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4hc_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list), + .cra_init = lz4hc_init, + .cra_exit = lz4hc_exit, + .cra_u = { .compress = { + .coa_compress = lz4hc_compress_crypto, + .coa_decompress = lz4hc_decompress_crypto } } +}; + +static int __init lz4hc_mod_init(void) +{ + return crypto_register_alg(&alg_lz4hc); +} + +static void __exit lz4hc_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4hc); +} + +module_init(lz4hc_mod_init); +module_exit(lz4hc_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC Compression Algorithm"); diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c index c689697..04e6d6a 100644 --- a/drivers/char/mwave/tp3780i.c +++ b/drivers/char/mwave/tp3780i.c @@ -479,6 +479,7 @@ int tp3780I_QueryAbilities(THINKPAD_BD_DATA * pBDData, MW_ABILITIES * pAbilities PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_QueryAbilities entry pBDData %p\n", pBDData); + memset(pAbilities, 0, sizeof(*pAbilities)); /* fill out standard constant fields */ pAbilities->instr_per_sec = pBDData->rDspSettings.uIps; pAbilities->data_size = pBDData->rDspSettings.uDStoreSize; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 5b2b5e6..661dc3e 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1112,64 +1112,6 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, bool *chained) return sg_nents; } -/** - * sg_copy_end_to_buffer - Copy end data from SG list to a linear buffer - * @sgl: The SG list - * @nents: Number of SG entries - * @buf: Where to copy to - * @buflen: The number of bytes to copy - * @skip: The number of bytes to skip before copying. - * Note: skip + buflen should equal SG total size. - * - * Returns the number of copied bytes. - * - **/ -static size_t sg_copy_end_to_buffer(struct scatterlist *sgl, unsigned int nents, - void *buf, size_t buflen, unsigned int skip) -{ - unsigned int offset = 0; - unsigned int boffset = 0; - struct sg_mapping_iter miter; - unsigned long flags; - unsigned int sg_flags = SG_MITER_ATOMIC; - size_t total_buffer = buflen + skip; - - sg_flags |= SG_MITER_FROM_SG; - - sg_miter_start(&miter, sgl, nents, sg_flags); - - local_irq_save(flags); - - while (sg_miter_next(&miter) && offset < total_buffer) { - unsigned int len; - unsigned int ignore; - - if ((offset + miter.length) > skip) { - if (offset < skip) { - /* Copy part of this segment */ - ignore = skip - offset; - len = miter.length - ignore; - if (boffset + len > buflen) - len = buflen - boffset; - memcpy(buf + boffset, miter.addr + ignore, len); - } else { - /* Copy all of this segment (up to buflen) */ - len = miter.length; - if (boffset + len > buflen) - len = buflen - boffset; - memcpy(buf + boffset, miter.addr, len); - } - boffset += len; - } - offset += miter.length; - } - - sg_miter_stop(&miter); - - local_irq_restore(flags); - return boffset; -} - /* * allocate and map the extended descriptor */ @@ -1800,7 +1742,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) if (to_hash_later) { int nents = sg_count(areq->src, nbytes, &chained); - sg_copy_end_to_buffer(areq->src, nents, + sg_pcopy_to_buffer(areq->src, nents, req_ctx->bufnext, to_hash_later, nbytes - to_hash_later); diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index c9cc08c..cc727ec 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1017,7 +1017,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device) struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; - dma_addr_t dma_addr, dest_dma; + dma_addr_t dest_dma; struct dma_async_tx_descriptor *tx; struct dma_chan *dma_chan; dma_cookie_t cookie; @@ -1516,7 +1516,7 @@ static int iop_adma_probe(struct platform_device *pdev) goto err_free_iop_chan; } - dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s%s)\n", + dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s)\n", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c index 9144a6b..6ba3514 100644 --- a/drivers/iommu/msm_iommu_dev.c +++ b/drivers/iommu/msm_iommu_dev.c @@ -291,25 +291,20 @@ static int msm_iommu_ctx_probe(struct platform_device *pdev) { struct msm_iommu_ctx_dev *c = pdev->dev.platform_data; struct msm_iommu_drvdata *drvdata; - struct msm_iommu_ctx_drvdata *ctx_drvdata = NULL; + struct msm_iommu_ctx_drvdata *ctx_drvdata; int i, ret; - if (!c || !pdev->dev.parent) { - ret = -EINVAL; - goto fail; - } - drvdata = dev_get_drvdata(pdev->dev.parent); + if (!c || !pdev->dev.parent) + return -EINVAL; - if (!drvdata) { - ret = -ENODEV; - goto fail; - } + drvdata = dev_get_drvdata(pdev->dev.parent); + if (!drvdata) + return -ENODEV; ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL); - if (!ctx_drvdata) { - ret = -ENOMEM; - goto fail; - } + if (!ctx_drvdata) + return -ENOMEM; + ctx_drvdata->num = c->num; ctx_drvdata->pdev = pdev; @@ -403,6 +398,7 @@ static int __init msm_iommu_driver_init(void) ret = platform_driver_register(&msm_iommu_ctx_driver); if (ret != 0) { + platform_driver_unregister(&msm_iommu_driver); pr_err("Failed to register IOMMU context driver\n"); goto error; } diff --git a/drivers/power/reset/restart-poweroff.c b/drivers/power/reset/restart-poweroff.c index 059cd15..5758033 100644 --- a/drivers/power/reset/restart-poweroff.c +++ b/drivers/power/reset/restart-poweroff.c @@ -15,11 +15,12 @@ #include <linux/platform_device.h> #include <linux/of_platform.h> #include <linux/module.h> +#include <linux/reboot.h> #include <asm/system_misc.h> static void restart_poweroff_do_poweroff(void) { - arm_pm_restart('h', NULL); + arm_pm_restart(REBOOT_HARD, NULL); } static int restart_poweroff_probe(struct platform_device *pdev) diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c index 469e696..476aa49 100644 --- a/drivers/power/reset/vexpress-poweroff.c +++ b/drivers/power/reset/vexpress-poweroff.c @@ -48,7 +48,7 @@ static void vexpress_power_off(void) static struct device *vexpress_restart_device; -static void vexpress_restart(char str, const char *cmd) +static void vexpress_restart(enum reboot_mode reboot_mode, const char *cmd) { vexpress_reset_do(vexpress_restart_device, "restart"); } diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index 90a3e86..767fee2 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -261,7 +261,12 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_data); - stmp_reset_block(rtc_data->io); + err = stmp_reset_block(rtc_data->io); + if (err) { + dev_err(&pdev->dev, "stmp_reset_block failed: %d\n", err); + return err; + } + writel(STMP3XXX_RTC_PERSISTENT0_ALARM_EN | STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE_EN | STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE, diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 0a537a0..d055450 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -439,10 +439,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd *scp, unsigned char *arr, act_len = sg_copy_from_buffer(sdb->table.sgl, sdb->table.nents, arr, arr_len); - if (sdb->resid) - sdb->resid -= act_len; - else - sdb->resid = scsi_bufflen(scp) - act_len; + sdb->resid = scsi_bufflen(scp) - act_len; return 0; } @@ -1693,24 +1690,48 @@ static int check_device_access_params(struct sdebug_dev_info *devi, return 0; } +/* Returns number of bytes copied or -1 if error. */ static int do_device_access(struct scsi_cmnd *scmd, struct sdebug_dev_info *devi, unsigned long long lba, unsigned int num, int write) { int ret; unsigned long long block, rest = 0; - int (*func)(struct scsi_cmnd *, unsigned char *, int); + struct scsi_data_buffer *sdb; + enum dma_data_direction dir; + size_t (*func)(struct scatterlist *, unsigned int, void *, size_t, + off_t); + + if (write) { + sdb = scsi_out(scmd); + dir = DMA_TO_DEVICE; + func = sg_pcopy_to_buffer; + } else { + sdb = scsi_in(scmd); + dir = DMA_FROM_DEVICE; + func = sg_pcopy_from_buffer; + } - func = write ? fetch_to_dev_buffer : fill_from_dev_buffer; + if (!sdb->length) + return 0; + if (!(scsi_bidi_cmnd(scmd) || scmd->sc_data_direction == dir)) + return -1; block = do_div(lba, sdebug_store_sectors); if (block + num > sdebug_store_sectors) rest = block + num - sdebug_store_sectors; - ret = func(scmd, fake_storep + (block * scsi_debug_sector_size), - (num - rest) * scsi_debug_sector_size); - if (!ret && rest) - ret = func(scmd, fake_storep, rest * scsi_debug_sector_size); + ret = func(sdb->table.sgl, sdb->table.nents, + fake_storep + (block * scsi_debug_sector_size), + (num - rest) * scsi_debug_sector_size, 0); + if (ret != (num - rest) * scsi_debug_sector_size) + return ret; + + if (rest) { + ret += func(sdb->table.sgl, sdb->table.nents, + fake_storep, rest * scsi_debug_sector_size, + (num - rest) * scsi_debug_sector_size); + } return ret; } @@ -1849,7 +1870,12 @@ static int resp_read(struct scsi_cmnd *SCpnt, unsigned long long lba, read_lock_irqsave(&atomic_rw, iflags); ret = do_device_access(SCpnt, devip, lba, num, 0); read_unlock_irqrestore(&atomic_rw, iflags); - return ret; + if (ret == -1) + return DID_ERROR << 16; + + scsi_in(SCpnt)->resid = scsi_bufflen(SCpnt) - ret; + + return 0; } void dump_sector(unsigned char *buf, int len) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 21664fc..4241e6f 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -86,6 +86,7 @@ struct msdos_sb_info { const void *dir_ops; /* Opaque; default directory operations */ int dir_per_block; /* dir entries per block */ int dir_per_block_bits; /* log2(dir_per_block) */ + unsigned int vol_id; /*volume ID*/ int fatent_shift; struct fatent_operations *fatent_ops; diff --git a/fs/fat/file.c b/fs/fat/file.c index b0b632e..9b104f5 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -114,6 +114,12 @@ out: return err; } +static int fat_ioctl_get_volume_id(struct inode *inode, u32 __user *user_attr) +{ + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + return put_user(sbi->vol_id, user_attr); +} + long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -124,6 +130,8 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return fat_ioctl_get_attributes(inode, user_attr); case FAT_IOCTL_SET_ATTRIBUTES: return fat_ioctl_set_attributes(filp, user_attr); + case FAT_IOCTL_GET_VOLUME_ID: + return fat_ioctl_get_volume_id(inode, user_attr); default: return -ENOTTY; /* Inappropriate ioctl for device */ } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5d4513c..11b51bb 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1415,6 +1415,18 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, brelse(fsinfo_bh); } + /* interpret volume ID as a little endian 32 bit integer */ + if (sbi->fat_bits == 32) + sbi->vol_id = (((u32)b->fat32.vol_id[0]) | + ((u32)b->fat32.vol_id[1] << 8) | + ((u32)b->fat32.vol_id[2] << 16) | + ((u32)b->fat32.vol_id[3] << 24)); + else /* fat 16 or 12 */ + sbi->vol_id = (((u32)b->fat16.vol_id[0]) | + ((u32)b->fat16.vol_id[1] << 8) | + ((u32)b->fat16.vol_id[2] << 16) | + ((u32)b->fat16.vol_id[3] << 24)); + sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a85ac4e..68851ff 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -963,7 +963,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) /* * Retrieve work items and do the writeback they describe */ -long wb_do_writeback(struct bdi_writeback *wb, int force_wait) +static long wb_do_writeback(struct bdi_writeback *wb) { struct backing_dev_info *bdi = wb->bdi; struct wb_writeback_work *work; @@ -971,12 +971,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) set_bit(BDI_writeback_running, &wb->bdi->state); while ((work = get_next_work_item(bdi)) != NULL) { - /* - * Override sync mode, in case we must wait for completion - * because this thread is exiting now. - */ - if (force_wait) - work->sync_mode = WB_SYNC_ALL; trace_writeback_exec(bdi, work); @@ -1025,7 +1019,7 @@ void bdi_writeback_workfn(struct work_struct *work) * rescuer as work_list needs to be drained. */ do { - pages_written = wb_do_writeback(wb, 0); + pages_written = wb_do_writeback(wb); trace_writeback_pages_written(pages_written); } while (!list_empty(&bdi->work_list)); } else { diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 2bfe6dc..1fedd5f 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -31,7 +31,6 @@ int dir_notify_enable __read_mostly = 1; static struct kmem_cache *dnotify_struct_cache __read_mostly; static struct kmem_cache *dnotify_mark_cache __read_mostly; static struct fsnotify_group *dnotify_group __read_mostly; -static DEFINE_MUTEX(dnotify_mark_mutex); /* * dnotify will attach one of these to each inode (i_fsnotify_marks) which @@ -183,7 +182,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); - mutex_lock(&dnotify_mark_mutex); + mutex_lock(&dnotify_group->mark_mutex); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; @@ -199,11 +198,12 @@ void dnotify_flush(struct file *filp, fl_owner_t id) spin_unlock(&fsn_mark->lock); - /* nothing else could have found us thanks to the dnotify_mark_mutex */ + /* nothing else could have found us thanks to the dnotify_groups + mark_mutex */ if (dn_mark->dn == NULL) - fsnotify_destroy_mark(fsn_mark, dnotify_group); + fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); - mutex_unlock(&dnotify_mark_mutex); + mutex_unlock(&dnotify_group->mark_mutex); fsnotify_put_mark(fsn_mark); } @@ -326,7 +326,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ - mutex_lock(&dnotify_mark_mutex); + mutex_lock(&dnotify_group->mark_mutex); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); @@ -334,7 +334,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0); + fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode, + NULL, 0); spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; @@ -348,9 +349,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) /* if (f != filp) means that we lost a race and another task/thread * actually closed the fd we are still playing with before we grabbed - * the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the - * only time we clean up the marks we need to get our mark off - * the list. */ + * the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the + * fd is the only time we clean up the marks we need to get our mark + * off the list. */ if (f != filp) { /* if we added ourselves, shoot ourselves, it's possible that * the flush actually did shoot this fsn_mark. That's fine too @@ -385,9 +386,9 @@ out: spin_unlock(&fsn_mark->lock); if (destroy) - fsnotify_destroy_mark(fsn_mark, dnotify_group); + fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); - mutex_unlock(&dnotify_mark_mutex); + mutex_unlock(&dnotify_group->mark_mutex); fsnotify_put_mark(fsn_mark); out_err: if (new_fsn_mark) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1ea52f7..e44cb64 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -122,6 +122,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(event->mask & FAN_Q_OVERFLOW)) @@ -523,14 +524,18 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, __u32 removed; int destroy_mark; + mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); - if (!fsn_mark) + if (!fsn_mark) { + mutex_unlock(&group->mark_mutex); return -ENOENT; + } removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); if (destroy_mark) - fsnotify_destroy_mark(fsn_mark, group); + fsnotify_destroy_mark_locked(fsn_mark, group); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); if (removed & real_mount(mnt)->mnt_fsnotify_mask) @@ -547,14 +552,19 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, __u32 removed; int destroy_mark; + mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_inode_mark(group, inode); - if (!fsn_mark) + if (!fsn_mark) { + mutex_unlock(&group->mark_mutex); return -ENOENT; + } removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); if (destroy_mark) - fsnotify_destroy_mark(fsn_mark, group); + fsnotify_destroy_mark_locked(fsn_mark, group); + mutex_unlock(&group->mark_mutex); + /* matches the fsnotify_find_inode_mark() */ fsnotify_put_mark(fsn_mark); if (removed & inode->i_fsnotify_mask) @@ -590,35 +600,55 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, return mask & ~oldmask; } +static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, + struct inode *inode, + struct vfsmount *mnt) +{ + struct fsnotify_mark *mark; + int ret; + + if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) + return ERR_PTR(-ENOSPC); + + mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); + if (!mark) + return ERR_PTR(-ENOMEM); + + fsnotify_init_mark(mark, fanotify_free_mark); + ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0); + if (ret) { + fsnotify_put_mark(mark); + return ERR_PTR(ret); + } + + return mark; +} + + static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt, __u32 mask, unsigned int flags) { struct fsnotify_mark *fsn_mark; __u32 added; - int ret = 0; + mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); if (!fsn_mark) { - if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) - return -ENOSPC; - - fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); - if (!fsn_mark) - return -ENOMEM; - - fsnotify_init_mark(fsn_mark, fanotify_free_mark); - ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); - if (ret) - goto err; + fsn_mark = fanotify_add_new_mark(group, NULL, mnt); + if (IS_ERR(fsn_mark)) { + mutex_unlock(&group->mark_mutex); + return PTR_ERR(fsn_mark); + } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); + mutex_unlock(&group->mark_mutex); if (added & ~real_mount(mnt)->mnt_fsnotify_mask) fsnotify_recalc_vfsmount_mask(mnt); -err: + fsnotify_put_mark(fsn_mark); - return ret; + return 0; } static int fanotify_add_inode_mark(struct fsnotify_group *group, @@ -627,7 +657,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark; __u32 added; - int ret = 0; pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -641,27 +670,23 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, (atomic_read(&inode->i_writecount) > 0)) return 0; + mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) { - if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) - return -ENOSPC; - - fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); - if (!fsn_mark) - return -ENOMEM; - - fsnotify_init_mark(fsn_mark, fanotify_free_mark); - ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); - if (ret) - goto err; + fsn_mark = fanotify_add_new_mark(group, inode, NULL); + if (IS_ERR(fsn_mark)) { + mutex_unlock(&group->mark_mutex); + return PTR_ERR(fsn_mark); + } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); + mutex_unlock(&group->mark_mutex); if (added & ~inode->i_fsnotify_mask) fsnotify_recalc_inode_mask(inode); -err: + fsnotify_put_mark(fsn_mark); - return ret; + return 0; } /* fanotify syscalls */ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 959815c..60f954a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -636,7 +636,8 @@ static int inotify_new_watch(struct fsnotify_group *group, goto out_err; /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); + ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, + NULL, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); @@ -660,19 +661,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; -retry: + mutex_lock(&group->mark_mutex); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - /* - * inotify_new_watch could race with another thread which did an - * inotify_new_watch between the update_existing and the add watch - * here, go back and try to update an existing mark again. - */ - if (ret == -EEXIST) - goto retry; + mutex_unlock(&group->mark_mutex); return ret; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index fc6b49b..923fe4a 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -20,28 +20,29 @@ * fsnotify inode mark locking/lifetime/and refcnting * * REFCNT: - * The mark->refcnt tells how many "things" in the kernel currently are - * referencing this object. The object typically will live inside the kernel - * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task - * which can find this object holding the appropriete locks, can take a reference - * and the object itself is guaranteed to survive until the reference is dropped. + * The group->recnt and mark->refcnt tell how many "things" in the kernel + * currently are referencing the objects. Both kind of objects typically will + * live inside the kernel with a refcnt of 2, one for its creation and one for + * the reference a group and a mark hold to each other. + * If you are holding the appropriate locks, you can take a reference and the + * object itself is guaranteed to survive until the reference is dropped. * * LOCKING: - * There are 3 spinlocks involved with fsnotify inode marks and they MUST - * be taken in order as follows: + * There are 3 locks involved with fsnotify inode marks and they MUST be taken + * in order as follows: * + * group->mark_mutex * mark->lock - * group->mark_lock * inode->i_lock * - * mark->lock protects 2 things, mark->group and mark->inode. You must hold - * that lock to dereference either of these things (they could be NULL even with - * the lock) - * - * group->mark_lock protects the marks_list anchored inside a given group - * and each mark is hooked via the g_list. It also sorta protects the - * free_g_list, which when used is anchored by a private list on the stack of the - * task which held the group->mark_lock. + * group->mark_mutex protects the marks_list anchored inside a given group and + * each mark is hooked via the g_list. It also protects the groups private + * data (i.e group limits). + + * mark->lock protects the marks attributes like its masks and flags. + * Furthermore it protects the access to a reference of the group that the mark + * is assigned to as well as the access to a reference of the inode/vfsmount + * that is being watched by the mark. * * inode->i_lock protects the i_fsnotify_marks list anchored inside a * given inode and each mark is hooked via the i_list. (and sorta the @@ -64,18 +65,11 @@ * inode. We take i_lock and walk the i_fsnotify_marks safely. For each * mark on the list we take a reference (so the mark can't disappear under us). * We remove that mark form the inode's list of marks and we add this mark to a - * private list anchored on the stack using i_free_list; At this point we no - * longer fear anything finding the mark using the inode's list of marks. - * - * We can safely and locklessly run the private list on the stack of everything - * we just unattached from the original inode. For each mark on the private list - * we grab the mark-> and can thus dereference mark->group and mark->inode. If - * we see the group and inode are not NULL we take those locks. Now holding all - * 3 locks we can completely remove the mark from other tasks finding it in the - * future. Remember, 10 things might already be referencing this mark, but they - * better be holding a ref. We drop our reference we took before we unhooked it - * from the inode. When the ref hits 0 we can free the mark. - * + * private list anchored on the stack using i_free_list; we walk i_free_list + * and before we destroy the mark we make sure that we dont race with a + * concurrent destroy_group by getting a ref to the marks group and taking the + * groups mutex. + * Very similarly for freeing by group, except we use free_g_list. * * This has the very interesting property of being able to run concurrently with diff --git a/include/linux/audit.h b/include/linux/audit.h index b20b038..729a4d1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -103,8 +103,11 @@ extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); extern void audit_putname(struct filename *name); + +#define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ +#define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ extern void __audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int parent); + unsigned int flags); extern void __audit_inode_child(const struct inode *parent, const struct dentry *dentry, const unsigned char type); @@ -148,10 +151,22 @@ static inline void audit_getname(struct filename *name) if (unlikely(!audit_dummy_context())) __audit_getname(name); } -static inline void audit_inode(struct filename *name, const struct dentry *dentry, +static inline void audit_inode(struct filename *name, + const struct dentry *dentry, unsigned int parent) { + if (unlikely(!audit_dummy_context())) { + unsigned int flags = 0; + if (parent) + flags |= AUDIT_INODE_PARENT; + __audit_inode(name, dentry, flags); + } +} +static inline void audit_inode_parent_hidden(struct filename *name, + const struct dentry *dentry) +{ if (unlikely(!audit_dummy_context())) - __audit_inode(name, dentry, parent); + __audit_inode(name, dentry, + AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN); } static inline void audit_inode_child(const struct inode *parent, const struct dentry *dentry, @@ -311,7 +326,7 @@ static inline void audit_putname(struct filename *name) { } static inline void __audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int parent) + unsigned int flags) { } static inline void __audit_inode_child(const struct inode *parent, const struct dentry *dentry, @@ -321,6 +336,9 @@ static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int parent) { } +static inline void audit_inode_parent_hidden(struct filename *name, + const struct dentry *dentry) +{ } static inline void audit_inode_child(const struct inode *parent, const struct dentry *dentry, const unsigned char type) diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h new file mode 100644 index 0000000..d5b68bf --- /dev/null +++ b/include/linux/decompress/unlz4.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZ4_H +#define DECOMPRESS_UNLZ4_H + +int unlz4(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 834c9e5..a35b10e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -372,8 +372,8 @@ struct address_space_operations { int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); /* - * migrate the contents of a page to the specified target. If sync - * is false, it must not block. + * migrate the contents of a page to the specified target. If + * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0f615eb..9b4dd49 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -209,7 +209,7 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) * 0x9 => DMA or NORMAL (MOVABLE+DMA) * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) * 0xb => BAD (MOVABLE+HIGHMEM+DMA) - * 0xc => DMA32 (MOVABLE+HIGHMEM+DMA32) + * 0xc => DMA32 (MOVABLE+DMA32) * 0xd => BAD (MOVABLE+DMA32+DMA) * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 0000000..d21c13f --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,87 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) + +/* + * lz4_compressbound() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline size_t lz4_compressbound(size_t isize) +{ + return isize + (isize / 255) + 16; +} + +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len); +#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index b87681a..f022460 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -151,12 +151,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) #endif -#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) -#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK -#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) -#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) -#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) - /* * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ae19af5..af4a3b7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -869,11 +869,6 @@ static inline int is_highmem_idx(enum zone_type idx) #endif } -static inline int is_normal_idx(enum zone_type idx) -{ - return (idx == ZONE_NORMAL); -} - /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references @@ -892,29 +887,6 @@ static inline int is_highmem(struct zone *zone) #endif } -static inline int is_normal(struct zone *zone) -{ - return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; -} - -static inline int is_dma32(struct zone *zone) -{ -#ifdef CONFIG_ZONE_DMA32 - return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; -#else - return 0; -#endif -} - -static inline int is_dma(struct zone *zone) -{ -#ifdef CONFIG_ZONE_DMA - return zone == zone->zone_pgdat->node_zones + ZONE_DMA; -#else - return 0; -#endif -} - /* These two functions are used to setup the per zone pages min values */ struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 89573a3..07d0df6 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -142,9 +142,6 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) { INIT_LIST_HEAD(&child->ptrace_entry); INIT_LIST_HEAD(&child->ptraced); -#ifdef CONFIG_HAVE_HW_BREAKPOINT - atomic_set(&child->ptrace_bp_refcnt, 1); -#endif child->jobctl = 0; child->ptrace = 0; child->parent = child->real_parent; @@ -351,11 +348,4 @@ extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); -#ifdef CONFIG_HAVE_HW_BREAKPOINT -extern int ptrace_get_breakpoints(struct task_struct *tsk); -extern void ptrace_put_breakpoints(struct task_struct *tsk); -#else -static inline void ptrace_put_breakpoints(struct task_struct *tsk) { } -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - #endif diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 23b3630..8e00f9f 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -10,6 +10,31 @@ #define SYS_HALT 0x0002 /* Notify of system halt */ #define SYS_POWER_OFF 0x0003 /* Notify of system power off */ +enum reboot_mode { + REBOOT_COLD = 0, + REBOOT_WARM, + REBOOT_HARD, + REBOOT_SOFT, + REBOOT_GPIO, +}; +extern enum reboot_mode reboot_mode; + +enum reboot_type { + BOOT_TRIPLE = 't', + BOOT_KBD = 'k', + BOOT_BIOS = 'b', + BOOT_ACPI = 'a', + BOOT_EFI = 'e', + BOOT_CF9 = 'p', + BOOT_CF9_COND = 'q', +}; +extern enum reboot_type reboot_type; + +extern int reboot_default; +extern int reboot_cpu; +extern int reboot_force; + + extern int register_reboot_notifier(struct notifier_block *); extern int unregister_reboot_notifier(struct notifier_block *); @@ -26,7 +51,7 @@ extern void machine_shutdown(void); struct pt_regs; extern void machine_crash_shutdown(struct pt_regs *); -/* +/* * Architecture independent implemenations of sys_reboot commands. */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 2680677..adae88f 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -244,6 +244,11 @@ size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen); +size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen, off_t skip); +size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen, off_t skip); + /* * Maximum number of entries that will be allocated in one piece, if * a list larger than this is required then chaining will be utilized. diff --git a/include/linux/sched.h b/include/linux/sched.h index cdd5407..75324d8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1401,9 +1401,6 @@ struct task_struct { } memcg_batch; unsigned int memcg_kmem_skip_account; #endif -#ifdef CONFIG_HAVE_HW_BREAKPOINT - atomic_t ptrace_bp_refcnt; -#endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif diff --git a/include/linux/sem.h b/include/linux/sem.h index 53d4265..976ce3a 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -12,10 +12,12 @@ struct task_struct; struct sem_array { struct kern_ipc_perm ____cacheline_aligned_in_smp sem_perm; /* permissions .. see ipc.h */ - time_t sem_otime; /* last semop time */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ - struct list_head sem_pending; /* pending operations to be processed */ + struct list_head pending_alter; /* pending operations */ + /* that alter the array */ + struct list_head pending_const; /* pending complex operations */ + /* that do not alter semvals */ struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h index ea7168a..617c01b 100644 --- a/include/linux/vexpress.h +++ b/include/linux/vexpress.h @@ -15,6 +15,7 @@ #define _LINUX_VEXPRESS_H #include <linux/device.h> +#include <linux/reboot.h> #define VEXPRESS_SITE_MB 0 #define VEXPRESS_SITE_DB1 1 diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index dd0a2c8..4b8a891 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,12 +10,12 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ /* bits in flags of vmalloc's vm_struct below */ -#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ -#define VM_ALLOC 0x00000002 /* vmalloc() */ -#define VM_MAP 0x00000004 /* vmap()ed pages */ -#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ -#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ -#define VM_UNLIST 0x00000020 /* vm_struct is not listed in vmlist */ +#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ +#define VM_ALLOC 0x00000002 /* vmalloc() */ +#define VM_MAP 0x00000004 /* vmap()ed pages */ +#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ +#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ +#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/include/linux/writeback.h b/include/linux/writeback.h index abfe117..4e198ca 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -47,11 +47,16 @@ enum wb_reason { WB_REASON_LAPTOP_TIMER, WB_REASON_FREE_MORE_MEM, WB_REASON_FS_FREE_SPACE, + /* + * There is no bdi forker thread any more and works are done + * by emergency worker, however, this is TPs userland visible + * and we'll be exposing exactly the same information, + * so it has a mismatch name. + */ WB_REASON_FORKER_THREAD, WB_REASON_MAX, }; -extern const char *wb_reason_name[]; /* * A control structure which tells the writeback code what to do. These are @@ -95,7 +100,6 @@ int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, void sync_inodes_sb(struct super_block *); long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); -long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index f055e58..e284ff9 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -104,6 +104,8 @@ struct __fat_dirent { /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */ #define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32) #define FAT_IOCTL_SET_ATTRIBUTES _IOW('r', 0x11, __u32) +/*Android kernel has used 0x12, so we use 0x13*/ +#define FAT_IOCTL_GET_VOLUME_ID _IOR('r', 0x13, __u32) struct fat_boot_sector { __u8 ignored[3]; /* Boot strap short or near jump */ @@ -128,6 +130,10 @@ struct fat_boot_sector { __u8 drive_number; /* Physical drive number */ __u8 state; /* undocumented, but used for mount state. */ + __u8 signature; /* extended boot signature */ + __u8 vol_id[4]; /* volume ID */ + __u8 vol_label[11]; /* volume label */ + __u8 fs_type[8]; /* file system type */ /* other fiealds are not added here */ } fat16; @@ -147,6 +153,10 @@ struct fat_boot_sector { __u8 drive_number; /* Physical drive number */ __u8 state; /* undocumented, but used for mount state. */ + __u8 signature; /* extended boot signature */ + __u8 vol_id[4]; /* volume ID */ + __u8 vol_label[11]; /* volume label */ + __u8 fs_type[8]; /* file system type */ /* other fiealds are not added here */ } fat32; }; diff --git a/init/Kconfig b/init/Kconfig index ea1be00..54d3fa5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -112,10 +112,13 @@ config HAVE_KERNEL_XZ config HAVE_KERNEL_LZO bool +config HAVE_KERNEL_LZ4 + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -182,6 +185,18 @@ config KERNEL_LZO size is about 10% bigger than gzip; however its speed (both compression and decompression) is the fastest. +config KERNEL_LZ4 + bool "LZ4" + depends on HAVE_KERNEL_LZ4 + help + LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding. + A preliminary version of LZ4 de/compression tool is available at + <https://code.google.com/p/lz4/>. + + Its compression ratio is worse than LZO. The size of the kernel + is about 8% bigger than LZO. But the decompression speed is + faster than LZO. + endchoice config DEFAULT_HOSTNAME diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e4e47f6..ae1996d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -823,6 +823,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, error = ro; goto out; } + audit_inode_parent_hidden(name, root); filp = do_create(ipc_ns, root->d_inode, &path, oflag, mode, u_attr ? &attr : NULL); @@ -868,6 +869,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) if (IS_ERR(name)) return PTR_ERR(name); + audit_inode_parent_hidden(name, mnt->mnt_root); err = mnt_want_write(mnt); if (err) goto out_name; @@ -141,27 +141,23 @@ void __init msg_init(void) IPC_MSG_IDS, sysvipc_msg_proc_show); } -/* - * msg_lock_(check_) routines are called in the paths where the rw_mutex - * is not held. - */ -static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) +static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct msg_queue, q_perm); } -static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, - int id) +static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, + int id) { - struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct msg_queue, q_perm); } @@ -199,9 +195,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* - * ipc_addid() locks msq - */ + /* ipc_addid() locks msq upon success. */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); if (id < 0) { security_msg_queue_free(msq); @@ -218,7 +212,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); return msq->q_perm.id; } @@ -408,31 +403,39 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, return -EFAULT; } - ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, - &msqid64.msg_perm, msqid64.msg_qbytes); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + down_write(&msg_ids(ns).rw_mutex); + rcu_read_lock(); + + ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, + &msqid64.msg_perm, msqid64.msg_qbytes); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } msq = container_of(ipcp, struct msg_queue, q_perm); err = security_msg_queue_msgctl(msq, cmd); if (err) - goto out_unlock; + goto out_unlock1; switch (cmd) { case IPC_RMID: + ipc_lock_object(&msq->q_perm); + /* freeque unlocks the ipc object and rcu */ freeque(ns, ipcp); goto out_up; case IPC_SET: if (msqid64.msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; - goto out_unlock; + goto out_unlock1; } + ipc_lock_object(&msq->q_perm); err = ipc_update_perm(&msqid64.msg_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; msq->q_qbytes = msqid64.msg_qbytes; @@ -448,25 +451,23 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, break; default: err = -EINVAL; + goto out_unlock1; } -out_unlock: - msg_unlock(msq); + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); out_up: up_write(&msg_ids(ns).rw_mutex); return err; } -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +static int msgctl_nolock(struct ipc_namespace *ns, int msqid, + int cmd, int version, void __user *buf) { + int err; struct msg_queue *msq; - int err, version; - struct ipc_namespace *ns; - - if (msqid < 0 || cmd < 0) - return -EINVAL; - - version = ipc_parse_version(&cmd); - ns = current->nsproxy->ipc_ns; switch (cmd) { case IPC_INFO: @@ -477,6 +478,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + /* * We must not return kernel stack data. * due to padding, it's not enough @@ -508,7 +510,8 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) return -EFAULT; return (max_id < 0) ? 0 : max_id; } - case MSG_STAT: /* msqid is an index rather than a msg queue id */ + + case MSG_STAT: case IPC_STAT: { struct msqid64_ds tbuf; @@ -517,17 +520,25 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + memset(&tbuf, 0, sizeof(tbuf)); + + rcu_read_lock(); if (cmd == MSG_STAT) { - msq = msg_lock(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = msq->q_perm.id; } else { - msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object_check(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = 0; } + err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; @@ -536,8 +547,6 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (err) goto out_unlock; - memset(&tbuf, 0, sizeof(tbuf)); - kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); tbuf.msg_stime = msq->q_stime; tbuf.msg_rtime = msq->q_rtime; @@ -547,24 +556,48 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) tbuf.msg_qbytes = msq->q_qbytes; tbuf.msg_lspid = msq->q_lspid; tbuf.msg_lrpid = msq->q_lrpid; - msg_unlock(msq); + rcu_read_unlock(); + if (copy_msqid_to_user(buf, &tbuf, version)) return -EFAULT; return success_return; } - case IPC_SET: - case IPC_RMID: - err = msgctl_down(ns, msqid, cmd, buf, version); - return err; + default: - return -EINVAL; + return -EINVAL; } + return err; out_unlock: - msg_unlock(msq); + rcu_read_unlock(); return err; } +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + int version; + struct ipc_namespace *ns; + + if (msqid < 0 || cmd < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case MSG_INFO: + case MSG_STAT: /* msqid is an index rather than a msg queue id */ + case IPC_STAT: + return msgctl_nolock(ns, msqid, cmd, version, buf); + case IPC_SET: + case IPC_RMID: + return msgctl_down(ns, msqid, cmd, buf, version); + default: + return -EINVAL; + } +} + static int testmsg(struct msg_msg *msg, long type, int mode) { switch(mode) @@ -640,10 +673,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, msg->m_type = mtype; msg->m_ts = msgsz; - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { err = PTR_ERR(msq); - goto out_free; + goto out_unlock1; } for (;;) { @@ -651,11 +685,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IWUGO)) - goto out_unlock_free; + goto out_unlock1; err = security_msg_queue_msgsnd(msq, msg, msgflg); if (err) - goto out_unlock_free; + goto out_unlock1; if (msgsz + msq->q_cbytes <= msq->q_qbytes && 1 + msq->q_qnum <= msq->q_qbytes) { @@ -665,32 +699,41 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, /* queue full, wait: */ if (msgflg & IPC_NOWAIT) { err = -EAGAIN; - goto out_unlock_free; + goto out_unlock1; } + + ipc_lock_object(&msq->q_perm); ss_add(msq, &s); if (!ipc_rcu_getref(msq)) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); - ipc_lock_by_ptr(&msq->q_perm); + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + ipc_rcu_putref(msq); if (msq->q_perm.deleted) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } + ss_del(&s); if (signal_pending(current)) { err = -ERESTARTNOHAND; - goto out_unlock_free; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + ipc_lock_object(&msq->q_perm); msq->q_lspid = task_tgid_vnr(current); msq->q_stime = get_seconds(); @@ -706,9 +749,10 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = 0; msg = NULL; -out_unlock_free: - msg_unlock(msq); -out_free: +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (msg != NULL) free_msg(msg); return err; @@ -816,21 +860,19 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) return ERR_PTR(-EAGAIN); } - -long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, - int msgflg, +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) { - struct msg_queue *msq; - struct msg_msg *msg; int mode; + struct msg_queue *msq; struct ipc_namespace *ns; - struct msg_msg *copy = NULL; + struct msg_msg *msg, *copy = NULL; ns = current->nsproxy->ipc_ns; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) @@ -838,8 +880,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, } mode = convert_mode(&msgtyp, msgflg); - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { + rcu_read_unlock(); free_copy(copy); return PTR_ERR(msq); } @@ -849,10 +893,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) - goto out_unlock; + goto out_unlock1; + ipc_lock_object(&msq->q_perm); msg = find_msg(msq, &msgtyp, mode); - if (!IS_ERR(msg)) { /* * Found a suitable message. @@ -860,7 +904,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); - goto out_unlock; + goto out_unlock0; } /* * If we are copying, then do not unlink message and do @@ -868,8 +912,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if (msgflg & MSG_COPY) { msg = copy_msg(msg, copy); - goto out_unlock; + goto out_unlock0; } + list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -878,14 +923,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, atomic_sub(msg->m_ts, &ns->msg_bytes); atomic_dec(&ns->msg_hdrs); ss_wakeup(&msq->q_senders, 0); - msg_unlock(msq); - break; + + goto out_unlock0; } + /* No message waiting. Wait for a message */ if (msgflg & IPC_NOWAIT) { msg = ERR_PTR(-ENOMSG); - goto out_unlock; + goto out_unlock0; } + list_add_tail(&msr_d.r_list, &msq->q_receivers); msr_d.r_tsk = current; msr_d.r_msgtype = msgtyp; @@ -896,8 +943,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); /* Lockless receive, part 1: @@ -908,7 +956,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * Prior to destruction, expunge_all(-EIRDM) changes r_msg. * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. * rcu_read_lock() prevents preemption between reading r_msg - * and the spin_lock() inside ipc_lock_by_ptr(). + * and acquiring the q_perm.lock in ipc_lock_object(). */ rcu_read_lock(); @@ -927,32 +975,34 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * If there is a message or an error then accept it without * locking. */ - if (msg != ERR_PTR(-EAGAIN)) { - rcu_read_unlock(); - break; - } + if (msg != ERR_PTR(-EAGAIN)) + goto out_unlock1; /* Lockless receive, part 3: * Acquire the queue spinlock. */ - ipc_lock_by_ptr(&msq->q_perm); - rcu_read_unlock(); + ipc_lock_object(&msq->q_perm); /* Lockless receive, part 4: * Repeat test after acquiring the spinlock. */ msg = (struct msg_msg*)msr_d.r_msg; if (msg != ERR_PTR(-EAGAIN)) - goto out_unlock; + goto out_unlock0; list_del(&msr_d.r_list); if (signal_pending(current)) { msg = ERR_PTR(-ERESTARTNOHAND); -out_unlock: - msg_unlock(msq); - break; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (IS_ERR(msg)) { free_copy(copy); return PTR_ERR(msg); @@ -95,8 +95,12 @@ struct sem { int semval; /* current value */ int sempid; /* pid of last operation */ spinlock_t lock; /* spinlock for fine-grained semtimedop */ - struct list_head sem_pending; /* pending single-sop operations */ -}; + struct list_head pending_alter; /* pending single-sop operations */ + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ + time_t sem_otime; /* candidate for sem_otime */ +} ____cacheline_aligned_in_smp; /* One queue for each sleeping process in the system. */ struct sem_queue { @@ -150,12 +154,15 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* - * linked list protection: + * Locking: * sem_undo.id_next, - * sem_array.sem_pending{,last}, - * sem_array.sem_undo: sem_lock() for read/write + * sem_array.complex_count, + * sem_array.pending{_alter,_cont}, + * sem_array.sem_undo: global sem_lock() for read/write * sem_undo.proc_next: only "current" is allowed to read/write that field. * + * sem_array.sem_base[i].pending_{const,alter}: + * global or semaphore sem_lock() for read/write */ #define sc_semmsl sem_ctls[0] @@ -189,6 +196,53 @@ void __init sem_init (void) IPC_SEM_IDS, sysvipc_sem_proc_show); } +/** + * unmerge_queues - unmerge queues, if possible. + * @sma: semaphore array + * + * The function unmerges the wait queues if complex_count is 0. + * It must be called prior to dropping the global semaphore array lock. + */ +static void unmerge_queues(struct sem_array *sma) +{ + struct sem_queue *q, *tq; + + /* complex operations still around? */ + if (sma->complex_count) + return; + /* + * We will switch back to simple mode. + * Move all pending operation back into the per-semaphore + * queues. + */ + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { + struct sem *curr; + curr = &sma->sem_base[q->sops[0].sem_num]; + + list_add_tail(&q->list, &curr->pending_alter); + } + INIT_LIST_HEAD(&sma->pending_alter); +} + +/** + * merge_queues - Merge single semop queues into global queue + * @sma: semaphore array + * + * This function merges all per-semaphore queues into the global queue. + * It is necessary to achieve FIFO ordering for the pending single-sop + * operations when a multi-semop operation must sleep. + * Only the alter operations must be moved, the const operations can stay. + */ +static void merge_queues(struct sem_array *sma) +{ + int i; + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + + list_splice_init(&sem->pending_alter, &sma->pending_alter); + } +} + /* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. @@ -246,7 +300,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * their critical section while the array lock is held. */ lock_array: - spin_lock(&sma->sem_perm.lock); + ipc_lock_object(&sma->sem_perm); for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); @@ -259,7 +313,8 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, static inline void sem_unlock(struct sem_array *sma, int locknum) { if (locknum == -1) { - spin_unlock(&sma->sem_perm.lock); + unmerge_queues(sma); + ipc_unlock_object(&sma->sem_perm); } else { struct sem *sem = sma->sem_base + locknum; spin_unlock(&sem->lock); @@ -337,7 +392,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * Without the check/retry algorithm a lockless wakeup is possible: * - queue.status is initialized to -EINTR before blocking. * - wakeup is performed by - * * unlinking the queue entry from sma->sem_pending + * * unlinking the queue entry from the pending list * * setting queue.status to IN_WAKEUP * This is the notification for the blocked thread that a * result value is imminent. @@ -418,12 +473,14 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_base = (struct sem *) &sma[1]; for (i = 0; i < nsems; i++) { - INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); + INIT_LIST_HEAD(&sma->sem_base[i].pending_const); spin_lock_init(&sma->sem_base[i].lock); } sma->complex_count = 0; - INIT_LIST_HEAD(&sma->sem_pending); + INIT_LIST_HEAD(&sma->pending_alter); + INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); @@ -482,12 +539,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/* - * Determine whether a sequence of semaphore operations would succeed - * all at once. Return 0 if yes, 1 if need to sleep, else return error code. +/** perform_atomic_semop - Perform (if possible) a semaphore operation + * @sma: semaphore array + * @sops: array with operations that should be checked + * @nsems: number of sops + * @un: undo array + * @pid: pid that did the change + * + * Returns 0 if the operation was possible. + * Returns 1 if the operation is impossible, the caller must sleep. + * Negative values are error codes. */ -static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, +static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, int nsops, struct sem_undo *un, int pid) { int result, sem_op; @@ -609,60 +673,132 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) * update_queue is O(N^2) when it restarts scanning the whole queue of * waiting operations. Therefore this function checks if the restart is * really necessary. It is called after a previously waiting operation - * was completed. + * modified the array. + * Note that wait-for-zero operations are handled without restart. */ static int check_restart(struct sem_array *sma, struct sem_queue *q) { - struct sem *curr; - struct sem_queue *h; - - /* if the operation didn't modify the array, then no restart */ - if (q->alter == 0) - return 0; - - /* pending complex operations are too difficult to analyse */ - if (sma->complex_count) + /* pending complex alter operations are too difficult to analyse */ + if (!list_empty(&sma->pending_alter)) return 1; /* we were a sleeping complex operation. Too difficult */ if (q->nsops > 1) return 1; - curr = sma->sem_base + q->sops[0].sem_num; + /* It is impossible that someone waits for the new value: + * - complex operations always restart. + * - wait-for-zero are handled seperately. + * - q is a previously sleeping simple operation that + * altered the array. It must be a decrement, because + * simple increments never sleep. + * - If there are older (higher priority) decrements + * in the queue, then they have observed the original + * semval value and couldn't proceed. The operation + * decremented to value - thus they won't proceed either. + */ + return 0; +} + +/** + * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks + * @sma: semaphore array. + * @semnum: semaphore that was modified. + * @pt: list head for the tasks that must be woken up. + * + * wake_const_ops must be called after a semaphore in a semaphore array + * was set to 0. If complex const operations are pending, wake_const_ops must + * be called with semnum = -1, as well as with the number of each modified + * semaphore. + * The tasks that must be woken up are added to @pt. The return code + * is stored in q->pid. + * The function returns 1 if at least one operation was completed successfully. + */ +static int wake_const_ops(struct sem_array *sma, int semnum, + struct list_head *pt) +{ + struct sem_queue *q; + struct list_head *walk; + struct list_head *pending_list; + int semop_completed = 0; - /* No-one waits on this queue */ - if (list_empty(&curr->sem_pending)) - return 0; + if (semnum == -1) + pending_list = &sma->pending_const; + else + pending_list = &sma->sem_base[semnum].pending_const; + + walk = pending_list->next; + while (walk != pending_list) { + int error; + + q = container_of(walk, struct sem_queue, list); + walk = walk->next; + + error = perform_atomic_semop(sma, q->sops, q->nsops, + q->undo, q->pid); - /* the new semaphore value */ - if (curr->semval) { - /* It is impossible that someone waits for the new value: - * - q is a previously sleeping simple operation that - * altered the array. It must be a decrement, because - * simple increments never sleep. - * - The value is not 0, thus wait-for-zero won't proceed. - * - If there are older (higher priority) decrements - * in the queue, then they have observed the original - * semval value and couldn't proceed. The operation - * decremented to value - thus they won't proceed either. + if (error <= 0) { + /* operation completed, remove from queue & wakeup */ + + unlink_queue(sma, q); + + wake_up_sem_queue_prepare(pt, q, error); + if (error == 0) + semop_completed = 1; + } + } + return semop_completed; +} + +/** + * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks + * @sma: semaphore array + * @sops: operations that were performed + * @nsops: number of operations + * @pt: list head of the tasks that must be woken up. + * + * do_smart_wakeup_zero() checks all required queue for wait-for-zero + * operations, based on the actual changes that were performed on the + * semaphore array. + * The function returns 1 if at least one operation was completed successfully. + */ +static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, + int nsops, struct list_head *pt) +{ + int i; + int semop_completed = 0; + int got_zero = 0; + + /* first: the per-semaphore queues, if known */ + if (sops) { + for (i = 0; i < nsops; i++) { + int num = sops[i].sem_num; + + if (sma->sem_base[num].semval == 0) { + got_zero = 1; + semop_completed |= wake_const_ops(sma, num, pt); + } + } + } else { + /* + * No sops means modified semaphores not known. + * Assume all were changed. */ - BUG_ON(q->sops[0].sem_op >= 0); - return 0; + for (i = 0; i < sma->sem_nsems; i++) { + if (sma->sem_base[i].semval == 0) { + got_zero = 1; + semop_completed |= wake_const_ops(sma, i, pt); + } + } } /* - * semval is 0. Check if there are wait-for-zero semops. - * They must be the first entries in the per-semaphore queue + * If one of the modified semaphores got 0, + * then check the global queue, too. */ - h = list_first_entry(&curr->sem_pending, struct sem_queue, list); - BUG_ON(h->nsops != 1); - BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); - - /* Yes, there is a wait-for-zero semop. Restart */ - if (h->sops[0].sem_op == 0) - return 1; + if (got_zero) + semop_completed |= wake_const_ops(sma, -1, pt); - /* Again - no-one is waiting for the new value. */ - return 0; + return semop_completed; } @@ -678,6 +814,8 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) * semaphore. * The tasks that must be woken up are added to @pt. The return code * is stored in q->pid. + * The function internally checks if const operations can now succeed. + * * The function return 1 if at least one semop was completed successfully. */ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) @@ -688,9 +826,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) int semop_completed = 0; if (semnum == -1) - pending_list = &sma->sem_pending; + pending_list = &sma->pending_alter; else - pending_list = &sma->sem_base[semnum].sem_pending; + pending_list = &sma->sem_base[semnum].pending_alter; again: walk = pending_list->next; @@ -702,16 +840,15 @@ again: /* If we are scanning the single sop, per-semaphore list of * one semaphore and that semaphore is 0, then it is not - * necessary to scan the "alter" entries: simple increments + * necessary to scan further: simple increments * that affect only one entry succeed immediately and cannot * be in the per semaphore pending queue, and decrements * cannot be successful if the value is already 0. */ - if (semnum != -1 && sma->sem_base[semnum].semval == 0 && - q->alter) + if (semnum != -1 && sma->sem_base[semnum].semval == 0) break; - error = try_atomic_semop(sma, q->sops, q->nsops, + error = perform_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ @@ -724,6 +861,7 @@ again: restart = 0; } else { semop_completed = 1; + do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); restart = check_restart(sma, q); } @@ -742,8 +880,8 @@ again: * @otime: force setting otime * @pt: list head of the tasks that must be woken up. * - * do_smart_update() does the required called to update_queue, based on the - * actual changes that were performed on the semaphore array. + * do_smart_update() does the required calls to update_queue and wakeup_zero, + * based on the actual changes that were performed on the semaphore array. * Note that the function does not do the actual wake-up: the caller is * responsible for calling wake_up_sem_queue_do(@pt). * It is safe to perform this call after dropping all locks. @@ -752,49 +890,46 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop int otime, struct list_head *pt) { int i; - int progress; - - progress = 1; -retry_global: - if (sma->complex_count) { - if (update_queue(sma, -1, pt)) { - progress = 1; - otime = 1; - sops = NULL; - } - } - if (!progress) - goto done; - if (!sops) { - /* No semops; something special is going on. */ - for (i = 0; i < sma->sem_nsems; i++) { - if (update_queue(sma, i, pt)) { - otime = 1; - progress = 1; - } - } - goto done_checkretry; - } + otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); - /* Check the semaphores that were modified. */ - for (i = 0; i < nsops; i++) { - if (sops[i].sem_op > 0 || - (sops[i].sem_op < 0 && - sma->sem_base[sops[i].sem_num].semval == 0)) - if (update_queue(sma, sops[i].sem_num, pt)) { - otime = 1; - progress = 1; + if (!list_empty(&sma->pending_alter)) { + /* semaphore array uses the global queue - just process it. */ + otime |= update_queue(sma, -1, pt); + } else { + if (!sops) { + /* + * No sops, thus the modified semaphores are not + * known. Check all. + */ + for (i = 0; i < sma->sem_nsems; i++) + otime |= update_queue(sma, i, pt); + } else { + /* + * Check the semaphores that were increased: + * - No complex ops, thus all sleeping ops are + * decrease. + * - if we decreased the value, then any sleeping + * semaphore ops wont be able to run: If the + * previous value was too small, then the new + * value will be too small, too. + */ + for (i = 0; i < nsops; i++) { + if (sops[i].sem_op > 0) { + otime |= update_queue(sma, + sops[i].sem_num, pt); + } } + } } -done_checkretry: - if (progress) { - progress = 0; - goto retry_global; + if (otime) { + if (sops == NULL) { + sma->sem_base[0].sem_otime = get_seconds(); + } else { + sma->sem_base[sops[0].sem_num].sem_otime = + get_seconds(); + } } -done: - if (otime) - sma->sem_otime = get_seconds(); } @@ -813,14 +948,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semncnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { + list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { struct sembuf * sops = q->sops; BUG_ON(sops->sem_num != semnum); if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) semncnt++; } - list_for_each_entry(q, &sma->sem_pending, list) { + list_for_each_entry(q, &sma->pending_alter, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -839,14 +974,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semzcnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { + list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { struct sembuf * sops = q->sops; BUG_ON(sops->sem_num != semnum); if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) semzcnt++; } - list_for_each_entry(q, &sma->sem_pending, list) { + list_for_each_entry(q, &sma->pending_const, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -872,7 +1007,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) int i; /* Free the existing undo structures for this semaphore set. */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { list_del(&un->list_id); spin_lock(&un->ulp->lock); @@ -884,13 +1019,22 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) /* Wake up all pending processes and let them fail with EIDRM. */ INIT_LIST_HEAD(&tasks); - list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { + list_for_each_entry_safe(q, tq, &sma->pending_const, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; - list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { + list_for_each_entry_safe(q, tq, &sem->pending_const, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } @@ -931,6 +1075,21 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, } } +static time_t get_semotime(struct sem_array *sma) +{ + int i; + time_t res; + + res = sma->sem_base[0].sem_otime; + for (i = 1; i < sma->sem_nsems; i++) { + time_t to = sma->sem_base[i].sem_otime; + + if (to > res) + res = to; + } + return res; +} + static int semctl_nolock(struct ipc_namespace *ns, int semid, int cmd, int version, void __user *p) { @@ -1004,9 +1163,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, goto out_unlock; kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); - tbuf.sem_otime = sma->sem_otime; - tbuf.sem_ctime = sma->sem_ctime; - tbuf.sem_nsems = sma->sem_nsems; + tbuf.sem_otime = get_semotime(sma); + tbuf.sem_ctime = sma->sem_ctime; + tbuf.sem_nsems = sma->sem_nsems; rcu_read_unlock(); if (copy_semid_to_user(p, &tbuf, version)) return -EFAULT; @@ -1070,7 +1229,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, curr = &sma->sem_base[semnum]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; @@ -1199,7 +1358,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) sma->sem_base[i].semval = sem_io[i]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) { for (i = 0; i < nsems; i++) un->semadj[i] = 0; @@ -1289,39 +1448,43 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } + down_write(&sem_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, &semid64.sem_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); - if (err) { - rcu_read_unlock(); - goto out_up; - } + if (err) + goto out_unlock1; - switch(cmd){ + switch (cmd) { case IPC_RMID: sem_lock(sma, NULL, -1); + /* freeary unlocks the ipc object and rcu */ freeary(ns, ipcp); goto out_up; case IPC_SET: sem_lock(sma, NULL, -1); err = ipc_update_perm(&semid64.sem_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; sma->sem_ctime = get_seconds(); break; default: - rcu_read_unlock(); err = -EINVAL; - goto out_up; + goto out_unlock1; } -out_unlock: +out_unlock0: sem_unlock(sma, -1); +out_unlock1: rcu_read_unlock(); out_up: up_write(&sem_ids(ns).rw_mutex); @@ -1496,7 +1659,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) new->semid = semid; assert_spin_locked(&ulp->lock); list_add_rcu(&new->list_proc, &ulp->list_proc); - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_add(&new->list_id, &sma->list_id); un = new; @@ -1533,7 +1696,6 @@ static int get_queue_result(struct sem_queue *q) return error; } - SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, unsigned, nsops, const struct timespec __user *, timeout) { @@ -1631,7 +1793,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (un && un->semid == -1) goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); + error = perform_atomic_semop(sma, sops, nsops, un, + task_tgid_vnr(current)); if (error <= 0) { if (alter && error == 0) do_smart_update(sma, sops, nsops, 1, &tasks); @@ -1653,15 +1816,27 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, struct sem *curr; curr = &sma->sem_base[sops->sem_num]; - if (alter) - list_add_tail(&queue.list, &curr->sem_pending); - else - list_add(&queue.list, &curr->sem_pending); + if (alter) { + if (sma->complex_count) { + list_add_tail(&queue.list, + &sma->pending_alter); + } else { + + list_add_tail(&queue.list, + &curr->pending_alter); + } + } else { + list_add_tail(&queue.list, &curr->pending_const); + } } else { + if (!sma->complex_count) + merge_queues(sma); + if (alter) - list_add_tail(&queue.list, &sma->sem_pending); + list_add_tail(&queue.list, &sma->pending_alter); else - list_add(&queue.list, &sma->sem_pending); + list_add_tail(&queue.list, &sma->pending_const); + sma->complex_count++; } @@ -1833,7 +2008,7 @@ void exit_sem(struct task_struct *tsk) } /* remove un from the linked lists */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); spin_lock(&ulp->lock); @@ -1882,6 +2057,9 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) { struct user_namespace *user_ns = seq_user_ns(s); struct sem_array *sma = it; + time_t sem_otime; + + sem_otime = get_semotime(sma); return seq_printf(s, "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", @@ -1893,7 +2071,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) from_kgid_munged(user_ns, sma->sem_perm.gid), from_kuid_munged(user_ns, sma->sem_perm.cuid), from_kgid_munged(user_ns, sma->sem_perm.cgid), - sma->sem_otime, + sem_otime, sma->sem_ctime); } #endif @@ -141,7 +141,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) { rcu_read_lock(); - spin_lock(&ipcp->shm_perm.lock); + ipc_lock_object(&ipcp->shm_perm); } static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, @@ -491,10 +491,10 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) sprintf (name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { - struct hstate *hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) - & SHM_HUGE_MASK); + struct hstate *hs; size_t hugesize; + hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); if (!hs) { error = -EINVAL; goto no_file; @@ -535,6 +535,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. @@ -543,7 +544,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ns->shm_tot += numpages; error = shp->shm_perm.id; - shm_unlock(shp); + + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); return error; no_id: @@ -754,31 +757,42 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, return -EFAULT; } + down_write(&shm_ids(ns).rw_mutex); + rcu_read_lock(); + ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + /* the ipc lock is not held upon failure */ + goto out_unlock1; + } shp = container_of(ipcp, struct shmid_kernel, shm_perm); err = security_shm_shmctl(shp, cmd); if (err) - goto out_unlock; + goto out_unlock0; + switch (cmd) { case IPC_RMID: + /* do_shm_rmid unlocks the ipc object and rcu */ do_shm_rmid(ns, ipcp); goto out_up; case IPC_SET: err = ipc_update_perm(&shmid64.shm_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; shp->shm_ctim = get_seconds(); break; default: err = -EINVAL; } -out_unlock: - shm_unlock(shp); + +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); out_up: up_write(&shm_ids(ns).rw_mutex); return err; @@ -246,9 +246,8 @@ int ipc_get_maxid(struct ipc_ids *ids) * is returned. The 'new' entry is returned in a locked state on success. * On failure the entry is not locked and a negative err-code is returned. * - * Called with ipc_ids.rw_mutex held as a writer. + * Called with writer ipc_ids.rw_mutex held. */ - int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { kuid_t euid; @@ -469,9 +468,7 @@ void ipc_free(void* ptr, int size) struct ipc_rcu { struct rcu_head rcu; atomic_t refcount; - /* "void *" makes sure alignment of following data is sane. */ - void *data[0]; -}; +} ____cacheline_aligned_in_smp; /** * ipc_rcu_alloc - allocate ipc and rcu space @@ -489,12 +486,14 @@ void *ipc_rcu_alloc(int size) if (unlikely(!out)) return NULL; atomic_set(&out->refcount, 1); - return out->data; + return out + 1; } int ipc_rcu_getref(void *ptr) { - return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount); + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; + + return atomic_inc_not_zero(&p->refcount); } /** @@ -508,7 +507,7 @@ static void ipc_schedule_free(struct rcu_head *head) void ipc_rcu_putref(void *ptr) { - struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data); + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; if (!atomic_dec_and_test(&p->refcount)) return; @@ -747,8 +746,10 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * It must be called without any lock held and * - retrieves the ipc with the given id in the given table. * - performs some audit and permission check, depending on the given cmd - * - returns the ipc with both ipc and rw_mutex locks held in case of success + * - returns the ipc with the ipc lock held in case of success * or an err-code without any lock held otherwise. + * + * Call holding the both the rw_mutex and the rcu read lock. */ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, @@ -773,13 +774,10 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, int err = -EPERM; struct kern_ipc_perm *ipcp; - down_write(&ids->rw_mutex); - rcu_read_lock(); - ipcp = ipc_obtain_object_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - goto out_up; + goto err; } audit_ipc_obj(ipcp); @@ -790,16 +788,8 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, euid = current_euid(); if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ipcp; - -out_up: - /* - * Unsuccessful lookup, unlock and return - * the corresponding error. - */ - rcu_read_unlock(); - up_write(&ids->rw_mutex); - + return ipcp; /* successful lookup */ +err: return ERR_PTR(err); } @@ -159,21 +159,31 @@ static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) return uid / SEQ_MULTIPLIER != ipcp->seq; } -static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +static inline void ipc_lock_object(struct kern_ipc_perm *perm) { - rcu_read_lock(); spin_lock(&perm->lock); } -static inline void ipc_unlock(struct kern_ipc_perm *perm) +static inline void ipc_unlock_object(struct kern_ipc_perm *perm) { spin_unlock(&perm->lock); - rcu_read_unlock(); } -static inline void ipc_lock_object(struct kern_ipc_perm *perm) +static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) { - spin_lock(&perm->lock); + assert_spin_locked(&perm->lock); +} + +static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +{ + rcu_read_lock(); + ipc_lock_object(perm); +} + +static inline void ipc_unlock(struct kern_ipc_perm *perm) +{ + ipc_unlock_object(perm); + rcu_read_unlock(); } struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); diff --git a/kernel/Makefile b/kernel/Makefile index 271fd31..470839d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o cred.o \ + notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/audit.h b/kernel/audit.h index 1c95131..123c9b7 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -85,6 +85,7 @@ struct audit_names { struct filename *name; int name_len; /* number of chars to log */ + bool hidden; /* don't log this record */ bool name_put; /* call __putname()? */ unsigned long ino; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6bd4a90..f7aee8b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -423,7 +423,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->lsm_rule = NULL; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) { + if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; } @@ -865,6 +865,12 @@ static inline int audit_add_rule(struct audit_entry *entry) err = audit_add_watch(&entry->rule, &list); if (err) { mutex_unlock(&audit_filter_mutex); + /* + * normally audit_add_tree_rule() will free it + * on failure + */ + if (tree) + audit_put_tree(tree); goto error; } } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3c8a601..9845cb3 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1399,8 +1399,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } i = 0; - list_for_each_entry(n, &context->names_list, list) + list_for_each_entry(n, &context->names_list, list) { + if (n->hidden) + continue; audit_log_name(context, n, NULL, i++, &call_panic); + } /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); @@ -1769,14 +1772,15 @@ void audit_putname(struct filename *name) * __audit_inode - store the inode and device from a lookup * @name: name being audited * @dentry: dentry being audited - * @parent: does this dentry represent the parent? + * @flags: attributes for this particular entry */ void __audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int parent) + unsigned int flags) { struct audit_context *context = current->audit_context; const struct inode *inode = dentry->d_inode; struct audit_names *n; + bool parent = flags & AUDIT_INODE_PARENT; if (!context->in_syscall) return; @@ -1831,6 +1835,8 @@ out: if (parent) { n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; n->type = AUDIT_TYPE_PARENT; + if (flags & AUDIT_INODE_HIDDEN) + n->hidden = true; } else { n->name_len = AUDIT_NAME_FULL; n->type = AUDIT_TYPE_NORMAL; diff --git a/kernel/exit.c b/kernel/exit.c index fafe75d..a949819 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -808,7 +808,7 @@ void do_exit(long code) /* * FIXME: do that only when needed, using sched_exit tracepoint */ - ptrace_put_breakpoints(tsk); + flush_ptrace_hw_breakpoint(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/panic.c b/kernel/panic.c index 167ec097..9771231 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -399,8 +399,9 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { - printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); + pr_warn("------------[ cut here ]------------\n"); + pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n", + raw_smp_processor_id(), current->pid, file, line, caller); if (args) vprintk(args->fmt, args->args); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ba5e6ce..4041f57 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -469,6 +469,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) /* Architecture-specific hardware disable .. */ ptrace_disable(child); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + flush_ptrace_hw_breakpoint(child); write_lock_irq(&tasklist_lock); /* @@ -1221,19 +1222,3 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, return ret; } #endif /* CONFIG_COMPAT */ - -#ifdef CONFIG_HAVE_HW_BREAKPOINT -int ptrace_get_breakpoints(struct task_struct *tsk) -{ - if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt)) - return 0; - - return -1; -} - -void ptrace_put_breakpoints(struct task_struct *tsk) -{ - if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt)) - flush_ptrace_hw_breakpoint(tsk); -} -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ diff --git a/kernel/reboot.c b/kernel/reboot.c new file mode 100644 index 0000000..269ed93 --- /dev/null +++ b/kernel/reboot.c @@ -0,0 +1,419 @@ +/* + * linux/kernel/reboot.c + * + * Copyright (C) 2013 Linus Torvalds + */ + +#define pr_fmt(fmt) "reboot: " fmt + +#include <linux/ctype.h> +#include <linux/export.h> +#include <linux/kexec.h> +#include <linux/kmod.h> +#include <linux/kmsg_dump.h> +#include <linux/reboot.h> +#include <linux/suspend.h> +#include <linux/syscalls.h> +#include <linux/syscore_ops.h> +#include <linux/uaccess.h> + +/* + * this indicates whether you can reboot with ctrl-alt-del: the default is yes + */ + +int C_A_D = 1; +struct pid *cad_pid; +EXPORT_SYMBOL(cad_pid); + +#if defined(CONFIG_ARM) || defined(CONFIG_UNICORE32) +#define DEFAULT_REBOOT_MODE = REBOOT_HARD +#else +#define DEFAULT_REBOOT_MODE +#endif +enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; + +int reboot_default; +int reboot_cpu; +enum reboot_type reboot_type = BOOT_ACPI; +int reboot_force; + +/* + * If set, this is used for preparing the system to power off. + */ + +void (*pm_power_off_prepare)(void); + +/** + * emergency_restart - reboot the system + * + * Without shutting down any hardware or taking any locks + * reboot the system. This is called when we know we are in + * trouble so this is our best effort to reboot. This is + * safe to call in interrupt context. + */ +void emergency_restart(void) +{ + kmsg_dump(KMSG_DUMP_EMERG); + machine_emergency_restart(); +} +EXPORT_SYMBOL_GPL(emergency_restart); + +void kernel_restart_prepare(char *cmd) +{ + blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); + system_state = SYSTEM_RESTART; + usermodehelper_disable(); + device_shutdown(); +} + +/** + * register_reboot_notifier - Register function to be called at reboot time + * @nb: Info about notifier function to be called + * + * Registers a function with the list of functions + * to be called at reboot time. + * + * Currently always returns zero, as blocking_notifier_chain_register() + * always returns zero. + */ +int register_reboot_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&reboot_notifier_list, nb); +} +EXPORT_SYMBOL(register_reboot_notifier); + +/** + * unregister_reboot_notifier - Unregister previously registered reboot notifier + * @nb: Hook to be unregistered + * + * Unregisters a previously registered reboot + * notifier function. + * + * Returns zero on success, or %-ENOENT on failure. + */ +int unregister_reboot_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); +} +EXPORT_SYMBOL(unregister_reboot_notifier); + +static void migrate_to_reboot_cpu(void) +{ + /* The boot cpu is always logical cpu 0 */ + int cpu = reboot_cpu; + + cpu_hotplug_disable(); + + /* Make certain the cpu I'm about to reboot on is online */ + if (!cpu_online(cpu)) + cpu = cpumask_first(cpu_online_mask); + + /* Prevent races with other tasks migrating this task */ + current->flags |= PF_NO_SETAFFINITY; + + /* Make certain I only run on the appropriate processor */ + set_cpus_allowed_ptr(current, cpumask_of(cpu)); +} + +/** + * kernel_restart - reboot the system + * @cmd: pointer to buffer containing command to execute for restart + * or %NULL + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ +void kernel_restart(char *cmd) +{ + kernel_restart_prepare(cmd); + migrate_to_reboot_cpu(); + syscore_shutdown(); + if (!cmd) + pr_emerg("Restarting system\n"); + else + pr_emerg("Restarting system with command '%s'\n", cmd); + kmsg_dump(KMSG_DUMP_RESTART); + machine_restart(cmd); +} +EXPORT_SYMBOL_GPL(kernel_restart); + +static void kernel_shutdown_prepare(enum system_states state) +{ + blocking_notifier_call_chain(&reboot_notifier_list, + (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); + system_state = state; + usermodehelper_disable(); + device_shutdown(); +} +/** + * kernel_halt - halt the system + * + * Shutdown everything and perform a clean system halt. + */ +void kernel_halt(void) +{ + kernel_shutdown_prepare(SYSTEM_HALT); + migrate_to_reboot_cpu(); + syscore_shutdown(); + pr_emerg("System halted\n"); + kmsg_dump(KMSG_DUMP_HALT); + machine_halt(); +} +EXPORT_SYMBOL_GPL(kernel_halt); + +/** + * kernel_power_off - power_off the system + * + * Shutdown everything and perform a clean system power_off. + */ +void kernel_power_off(void) +{ + kernel_shutdown_prepare(SYSTEM_POWER_OFF); + if (pm_power_off_prepare) + pm_power_off_prepare(); + migrate_to_reboot_cpu(); + syscore_shutdown(); + pr_emerg("Power down\n"); + kmsg_dump(KMSG_DUMP_POWEROFF); + machine_power_off(); +} +EXPORT_SYMBOL_GPL(kernel_power_off); + +static DEFINE_MUTEX(reboot_mutex); + +/* + * Reboot system call: for obvious reasons only root may call it, + * and even root needs to set up some magic numbers in the registers + * so that some mistake won't make this reboot the whole machine. + * You can also set the meaning of the ctrl-alt-del-key here. + * + * reboot doesn't sync: do that yourself before calling this. + */ +SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, + void __user *, arg) +{ + struct pid_namespace *pid_ns = task_active_pid_ns(current); + char buffer[256]; + int ret = 0; + + /* We only trust the superuser with rebooting the system. */ + if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) + return -EPERM; + + /* For safety, we require "magic" arguments. */ + if (magic1 != LINUX_REBOOT_MAGIC1 || + (magic2 != LINUX_REBOOT_MAGIC2 && + magic2 != LINUX_REBOOT_MAGIC2A && + magic2 != LINUX_REBOOT_MAGIC2B && + magic2 != LINUX_REBOOT_MAGIC2C)) + return -EINVAL; + + /* + * If pid namespaces are enabled and the current task is in a child + * pid_namespace, the command is handled by reboot_pid_ns() which will + * call do_exit(). + */ + ret = reboot_pid_ns(pid_ns, cmd); + if (ret) + return ret; + + /* Instead of trying to make the power_off code look like + * halt when pm_power_off is not set do it the easy way. + */ + if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) + cmd = LINUX_REBOOT_CMD_HALT; + + mutex_lock(&reboot_mutex); + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + kernel_restart(NULL); + break; + + case LINUX_REBOOT_CMD_CAD_ON: + C_A_D = 1; + break; + + case LINUX_REBOOT_CMD_CAD_OFF: + C_A_D = 0; + break; + + case LINUX_REBOOT_CMD_HALT: + kernel_halt(); + do_exit(0); + panic("cannot halt"); + + case LINUX_REBOOT_CMD_POWER_OFF: + kernel_power_off(); + do_exit(0); + break; + + case LINUX_REBOOT_CMD_RESTART2: + ret = strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1); + if (ret < 0) { + ret = -EFAULT; + break; + } + buffer[sizeof(buffer) - 1] = '\0'; + + kernel_restart(buffer); + break; + +#ifdef CONFIG_KEXEC + case LINUX_REBOOT_CMD_KEXEC: + ret = kernel_kexec(); + break; +#endif + +#ifdef CONFIG_HIBERNATION + case LINUX_REBOOT_CMD_SW_SUSPEND: + ret = hibernate(); + break; +#endif + + default: + ret = -EINVAL; + break; + } + mutex_unlock(&reboot_mutex); + return ret; +} + +static void deferred_cad(struct work_struct *dummy) +{ + kernel_restart(NULL); +} + +/* + * This function gets called by ctrl-alt-del - ie the keyboard interrupt. + * As it's called within an interrupt, it may NOT sync: the only choice + * is whether to reboot at once, or just ignore the ctrl-alt-del. + */ +void ctrl_alt_del(void) +{ + static DECLARE_WORK(cad_work, deferred_cad); + + if (C_A_D) + schedule_work(&cad_work); + else + kill_cad_pid(SIGINT, 1); +} + +char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; + +static int __orderly_poweroff(bool force) +{ + char **argv; + static char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL + }; + int ret; + + argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); + if (argv) { + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + argv_free(argv); + } else { + ret = -ENOMEM; + } + + if (ret && force) { + pr_warn("Failed to start orderly shutdown: forcing the issue\n"); + /* + * I guess this should try to kick off some daemon to sync and + * poweroff asap. Or not even bother syncing if we're doing an + * emergency shutdown? + */ + emergency_sync(); + kernel_power_off(); + } + + return ret; +} + +static bool poweroff_force; + +static void poweroff_work_func(struct work_struct *work) +{ + __orderly_poweroff(poweroff_force); +} + +static DECLARE_WORK(poweroff_work, poweroff_work_func); + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + if (force) /* do not override the pending "true" */ + poweroff_force = true; + schedule_work(&poweroff_work); + return 0; +} +EXPORT_SYMBOL_GPL(orderly_poweroff); + +static int __init reboot_setup(char *str) +{ + for (;;) { + /* + * Having anything passed on the command line via + * reboot= will cause us to disable DMI checking + * below. + */ + reboot_default = 0; + + switch (*str) { + case 'w': + reboot_mode = REBOOT_WARM; + break; + + case 'c': + reboot_mode = REBOOT_COLD; + break; + + case 'h': + reboot_mode = REBOOT_HARD; + break; + + case 's': + if (isdigit(*(str+1))) + reboot_cpu = simple_strtoul(str+1, NULL, 0); + else if (str[1] == 'm' && str[2] == 'p' && + isdigit(*(str+3))) + reboot_cpu = simple_strtoul(str+3, NULL, 0); + else + reboot_mode = REBOOT_SOFT; + break; + + case 'g': + reboot_mode = REBOOT_GPIO; + break; + + case 'b': + case 'a': + case 'k': + case 't': + case 'e': + case 'p': + reboot_type = *str; + break; + + case 'f': + reboot_force = 1; + break; + } + + str = strchr(str, ','); + if (str) + str++; + else + break; + } + return 1; +} +__setup("reboot=", reboot_setup); diff --git a/kernel/sys.c b/kernel/sys.c index 071de90..771129b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -116,20 +116,6 @@ EXPORT_SYMBOL(fs_overflowuid); EXPORT_SYMBOL(fs_overflowgid); /* - * this indicates whether you can reboot with ctrl-alt-del: the default is yes - */ - -int C_A_D = 1; -struct pid *cad_pid; -EXPORT_SYMBOL(cad_pid); - -/* - * If set, this is used for preparing the system to power off. - */ - -void (*pm_power_off_prepare)(void); - -/* * Returns true if current's euid is same as p's uid or euid, * or has CAP_SYS_NICE to p's user_ns. * @@ -308,266 +294,6 @@ out_unlock: return retval; } -/** - * emergency_restart - reboot the system - * - * Without shutting down any hardware or taking any locks - * reboot the system. This is called when we know we are in - * trouble so this is our best effort to reboot. This is - * safe to call in interrupt context. - */ -void emergency_restart(void) -{ - kmsg_dump(KMSG_DUMP_EMERG); - machine_emergency_restart(); -} -EXPORT_SYMBOL_GPL(emergency_restart); - -void kernel_restart_prepare(char *cmd) -{ - blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); - system_state = SYSTEM_RESTART; - usermodehelper_disable(); - device_shutdown(); -} - -/** - * register_reboot_notifier - Register function to be called at reboot time - * @nb: Info about notifier function to be called - * - * Registers a function with the list of functions - * to be called at reboot time. - * - * Currently always returns zero, as blocking_notifier_chain_register() - * always returns zero. - */ -int register_reboot_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&reboot_notifier_list, nb); -} -EXPORT_SYMBOL(register_reboot_notifier); - -/** - * unregister_reboot_notifier - Unregister previously registered reboot notifier - * @nb: Hook to be unregistered - * - * Unregisters a previously registered reboot - * notifier function. - * - * Returns zero on success, or %-ENOENT on failure. - */ -int unregister_reboot_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); -} -EXPORT_SYMBOL(unregister_reboot_notifier); - -/* Add backwards compatibility for stable trees. */ -#ifndef PF_NO_SETAFFINITY -#define PF_NO_SETAFFINITY PF_THREAD_BOUND -#endif - -static void migrate_to_reboot_cpu(void) -{ - /* The boot cpu is always logical cpu 0 */ - int cpu = 0; - - cpu_hotplug_disable(); - - /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(cpu)) - cpu = cpumask_first(cpu_online_mask); - - /* Prevent races with other tasks migrating this task */ - current->flags |= PF_NO_SETAFFINITY; - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, cpumask_of(cpu)); -} - -/** - * kernel_restart - reboot the system - * @cmd: pointer to buffer containing command to execute for restart - * or %NULL - * - * Shutdown everything and perform a clean reboot. - * This is not safe to call in interrupt context. - */ -void kernel_restart(char *cmd) -{ - kernel_restart_prepare(cmd); - migrate_to_reboot_cpu(); - syscore_shutdown(); - if (!cmd) - printk(KERN_EMERG "Restarting system.\n"); - else - printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); - kmsg_dump(KMSG_DUMP_RESTART); - machine_restart(cmd); -} -EXPORT_SYMBOL_GPL(kernel_restart); - -static void kernel_shutdown_prepare(enum system_states state) -{ - blocking_notifier_call_chain(&reboot_notifier_list, - (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); - system_state = state; - usermodehelper_disable(); - device_shutdown(); -} -/** - * kernel_halt - halt the system - * - * Shutdown everything and perform a clean system halt. - */ -void kernel_halt(void) -{ - kernel_shutdown_prepare(SYSTEM_HALT); - migrate_to_reboot_cpu(); - syscore_shutdown(); - printk(KERN_EMERG "System halted.\n"); - kmsg_dump(KMSG_DUMP_HALT); - machine_halt(); -} - -EXPORT_SYMBOL_GPL(kernel_halt); - -/** - * kernel_power_off - power_off the system - * - * Shutdown everything and perform a clean system power_off. - */ -void kernel_power_off(void) -{ - kernel_shutdown_prepare(SYSTEM_POWER_OFF); - if (pm_power_off_prepare) - pm_power_off_prepare(); - migrate_to_reboot_cpu(); - syscore_shutdown(); - printk(KERN_EMERG "Power down.\n"); - kmsg_dump(KMSG_DUMP_POWEROFF); - machine_power_off(); -} -EXPORT_SYMBOL_GPL(kernel_power_off); - -static DEFINE_MUTEX(reboot_mutex); - -/* - * Reboot system call: for obvious reasons only root may call it, - * and even root needs to set up some magic numbers in the registers - * so that some mistake won't make this reboot the whole machine. - * You can also set the meaning of the ctrl-alt-del-key here. - * - * reboot doesn't sync: do that yourself before calling this. - */ -SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, - void __user *, arg) -{ - struct pid_namespace *pid_ns = task_active_pid_ns(current); - char buffer[256]; - int ret = 0; - - /* We only trust the superuser with rebooting the system. */ - if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) - return -EPERM; - - /* For safety, we require "magic" arguments. */ - if (magic1 != LINUX_REBOOT_MAGIC1 || - (magic2 != LINUX_REBOOT_MAGIC2 && - magic2 != LINUX_REBOOT_MAGIC2A && - magic2 != LINUX_REBOOT_MAGIC2B && - magic2 != LINUX_REBOOT_MAGIC2C)) - return -EINVAL; - - /* - * If pid namespaces are enabled and the current task is in a child - * pid_namespace, the command is handled by reboot_pid_ns() which will - * call do_exit(). - */ - ret = reboot_pid_ns(pid_ns, cmd); - if (ret) - return ret; - - /* Instead of trying to make the power_off code look like - * halt when pm_power_off is not set do it the easy way. - */ - if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) - cmd = LINUX_REBOOT_CMD_HALT; - - mutex_lock(&reboot_mutex); - switch (cmd) { - case LINUX_REBOOT_CMD_RESTART: - kernel_restart(NULL); - break; - - case LINUX_REBOOT_CMD_CAD_ON: - C_A_D = 1; - break; - - case LINUX_REBOOT_CMD_CAD_OFF: - C_A_D = 0; - break; - - case LINUX_REBOOT_CMD_HALT: - kernel_halt(); - do_exit(0); - panic("cannot halt.\n"); - - case LINUX_REBOOT_CMD_POWER_OFF: - kernel_power_off(); - do_exit(0); - break; - - case LINUX_REBOOT_CMD_RESTART2: - if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { - ret = -EFAULT; - break; - } - buffer[sizeof(buffer) - 1] = '\0'; - - kernel_restart(buffer); - break; - -#ifdef CONFIG_KEXEC - case LINUX_REBOOT_CMD_KEXEC: - ret = kernel_kexec(); - break; -#endif - -#ifdef CONFIG_HIBERNATION - case LINUX_REBOOT_CMD_SW_SUSPEND: - ret = hibernate(); - break; -#endif - - default: - ret = -EINVAL; - break; - } - mutex_unlock(&reboot_mutex); - return ret; -} - -static void deferred_cad(struct work_struct *dummy) -{ - kernel_restart(NULL); -} - -/* - * This function gets called by ctrl-alt-del - ie the keyboard interrupt. - * As it's called within an interrupt, it may NOT sync: the only choice - * is whether to reboot at once, or just ignore the ctrl-alt-del. - */ -void ctrl_alt_del(void) -{ - static DECLARE_WORK(cad_work, deferred_cad); - - if (C_A_D) - schedule_work(&cad_work); - else - kill_cad_pid(SIGINT, 1); -} - /* * Unprivileged users may change the real gid to the effective gid * or vice versa. (BSD-style) @@ -2292,68 +2018,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, return err ? -EFAULT : 0; } -char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; - -static int __orderly_poweroff(bool force) -{ - char **argv; - static char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL - }; - int ret; - - argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); - if (argv) { - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); - argv_free(argv); - } else { - printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", - __func__, poweroff_cmd); - ret = -ENOMEM; - } - - if (ret && force) { - printk(KERN_WARNING "Failed to start orderly shutdown: " - "forcing the issue\n"); - /* - * I guess this should try to kick off some daemon to sync and - * poweroff asap. Or not even bother syncing if we're doing an - * emergency shutdown? - */ - emergency_sync(); - kernel_power_off(); - } - - return ret; -} - -static bool poweroff_force; - -static void poweroff_work_func(struct work_struct *work) -{ - __orderly_poweroff(poweroff_force); -} - -static DECLARE_WORK(poweroff_work, poweroff_work_func); - -/** - * orderly_poweroff - Trigger an orderly system poweroff - * @force: force poweroff if command execution fails - * - * This may be called from any context to trigger a system shutdown. - * If the orderly shutdown fails, it will force an immediate shutdown. - */ -int orderly_poweroff(bool force) -{ - if (force) /* do not override the pending "true" */ - poweroff_force = true; - schedule_work(&poweroff_work); - return 0; -} -EXPORT_SYMBOL_GPL(orderly_poweroff); - /** * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill diff --git a/lib/Kconfig b/lib/Kconfig index f1ed53c..35da513 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -194,6 +194,15 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # @@ -218,6 +227,10 @@ config DECOMPRESS_LZO select LZO_DECOMPRESS tristate +config DECOMPRESS_LZ4 + select LZ4_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Makefile b/lib/Makefile index c09e38e..7baccfd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ lib-y += kobject.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o @@ -75,6 +75,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ @@ -83,6 +86,7 @@ lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o +lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c new file mode 100644 index 0000000..a8f8379 --- /dev/null +++ b/lib/clz_ctz.c @@ -0,0 +1,58 @@ +/* + * lib/clz_ctz.c + * + * Copyright (C) 2013 Chanho Min <chanho.min@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * __c[lt]z[sd]i2 can be overridden by linking arch-specific versions. + */ + +#include <linux/export.h> +#include <linux/kernel.h> + +int __weak __ctzsi2(int val) +{ + return __ffs(val); +} +EXPORT_SYMBOL(__ctzsi2); + +int __weak __clzsi2(int val) +{ + return 32 - fls(val); +} +EXPORT_SYMBOL(__clzsi2); + +#if BITS_PER_LONG == 32 + +int __weak __clzdi2(long val) +{ + return 32 - fls((int)val); +} +EXPORT_SYMBOL(__clzdi2); + +int __weak __ctzdi2(long val) +{ + return __ffs((u32)val); +} +EXPORT_SYMBOL(__ctzdi2); + +#elif BITS_PER_LONG == 64 + +int __weak __clzdi2(long val) +{ + return 64 - fls64((u64)val); +} +EXPORT_SYMBOL(__clzdi2); + +int __weak __ctzdi2(long val) +{ + return __ffs64((u64)val); +} +EXPORT_SYMBOL(__ctzdi2); + +#else +#error BITS_PER_LONG not 32 or 64 +#endif diff --git a/lib/decompress.c b/lib/decompress.c index f8fdeda..4d1cd03 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -11,6 +11,7 @@ #include <linux/decompress/unxz.h> #include <linux/decompress/inflate.h> #include <linux/decompress/unlzo.h> +#include <linux/decompress/unlz4.h> #include <linux/types.h> #include <linux/string.h> @@ -31,6 +32,9 @@ #ifndef CONFIG_DECOMPRESS_LZO # define unlzo NULL #endif +#ifndef CONFIG_DECOMPRESS_LZ4 +# define unlz4 NULL +#endif struct compress_format { unsigned char magic[2]; @@ -45,6 +49,7 @@ static const struct compress_format compressed_formats[] __initconst = { { {0x5d, 0x00}, "lzma", unlzma }, { {0xfd, 0x37}, "xz", unxz }, { {0x89, 0x4c}, "lzo", unlzo }, + { {0x02, 0x21}, "lz4", unlz4 }, { {0, 0}, NULL, NULL } }; diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c new file mode 100644 index 0000000..3e67cfad1 --- /dev/null +++ b/lib/decompress_unlz4.c @@ -0,0 +1,187 @@ +/* + * Wrapper for decompressing LZ4-compressed kernel, initramfs, and initrd + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef STATIC +#define PREBOOT +#include "lz4/lz4_decompress.c" +#else +#include <linux/decompress/unlz4.h> +#endif +#include <linux/types.h> +#include <linux/lz4.h> +#include <linux/decompress/mm.h> +#include <linux/compiler.h> + +#include <asm/unaligned.h> + +/* + * Note: Uncompressed chunk size is used in the compressor side + * (userspace side for compression). + * It is hardcoded because there is not proper way to extract it + * from the binary stream which is generated by the preliminary + * version of LZ4 tool so far. + */ +#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20) +#define ARCHIVE_MAGICNUMBER 0x184C2102 + +STATIC inline int INIT unlz4(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error) (char *x)) +{ + int ret = -1; + size_t chunksize = 0; + size_t uncomp_chunksize = LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE; + u8 *inp; + u8 *inp_start; + u8 *outp; + int size = in_len; +#ifdef PREBOOT + size_t out_len = get_unaligned_le32(input + in_len); +#endif + size_t dest_len; + + + if (output) { + outp = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit_0; + } else { + outp = large_malloc(uncomp_chunksize); + if (!outp) { + error("Could not allocate output buffer"); + goto exit_0; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided,"); + goto exit_1; + } else if (input) { + inp = input; + } else if (!fill) { + error("NULL input pointer and missing fill function"); + goto exit_1; + } else { + inp = large_malloc(lz4_compressbound(uncomp_chunksize)); + if (!inp) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + inp_start = inp; + + if (posp) + *posp = 0; + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + } else { + error("invalid header"); + goto exit_2; + } + + if (posp) + *posp += 4; + + for (;;) { + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + if (posp) + *posp += 4; + continue; + } + inp += 4; + size -= 4; + + if (posp) + *posp += 4; + + if (fill) { + if (chunksize > lz4_compressbound(uncomp_chunksize)) { + error("chunk length is longer than allocated"); + goto exit_2; + } + fill(inp, chunksize); + } +#ifdef PREBOOT + if (out_len >= uncomp_chunksize) { + dest_len = uncomp_chunksize; + out_len -= dest_len; + } else + dest_len = out_len; + ret = lz4_decompress(inp, &chunksize, outp, dest_len); +#else + dest_len = uncomp_chunksize; + ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp, + &dest_len); +#endif + if (ret < 0) { + error("Decoding failed"); + goto exit_2; + } + + if (flush && flush(outp, dest_len) != dest_len) + goto exit_2; + if (output) + outp += dest_len; + if (posp) + *posp += chunksize; + + size -= chunksize; + + if (size == 0) + break; + else if (size < 0) { + error("data corrupted"); + goto exit_2; + } + + inp += chunksize; + if (fill) + inp = inp_start; + } + + ret = 0; +exit_2: + if (!input) + large_free(inp_start); +exit_1: + if (!output) + large_free(outp); +exit_0: + return ret; +} + +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ) +{ + return unlz4(buf, in_len - 4, fill, flush, output, posp, error); +} +#endif diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 0000000..8085d04 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 0000000..fd94058 --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min <chanho.min@lge.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/lz4.h> +#include <asm/unaligned.h> +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 0000000..d3414ea --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,326 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include <linux/module.h> +#include <linux/kernel.h> +#endif +#include <linux/lz4.h> + +#include <asm/unaligned.h> + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + unsigned token; + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const) dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > (oend - COPYLENGTH)) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *) op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *) ip) - source)); +} + +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 0000000..abcecdc --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,156 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ + && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) + +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 0000000..eb1a74f --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min <chanho.min@lge.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/lz4.h> +#include <asm/unaligned.h> +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +static int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a1cf8ca..a685c8a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -247,13 +247,15 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, struct scatterlist *sg, *prv; unsigned int left; + memset(table, 0, sizeof(*table)); + + if (nents == 0) + return -EINVAL; #ifndef ARCH_HAS_SG_CHAIN if (WARN_ON_ONCE(nents > max_ents)) return -EINVAL; #endif - memset(table, 0, sizeof(*table)); - left = nents; prv = NULL; do { @@ -453,6 +455,65 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, } EXPORT_SYMBOL(sg_miter_start); +static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) +{ + if (!miter->__remaining) { + struct scatterlist *sg; + unsigned long pgoffset; + + if (!__sg_page_iter_next(&miter->piter)) + return false; + + sg = miter->piter.sg; + pgoffset = miter->piter.sg_pgoffset; + + miter->__offset = pgoffset ? 0 : sg->offset; + miter->__remaining = sg->offset + sg->length - + (pgoffset << PAGE_SHIFT) - miter->__offset; + miter->__remaining = min_t(unsigned long, miter->__remaining, + PAGE_SIZE - miter->__offset); + } + + return true; +} + +/** + * sg_miter_skip - reposition mapping iterator + * @miter: sg mapping iter to be skipped + * @offset: number of bytes to plus the current location + * + * Description: + * Sets the offset of @miter to its current location plus @offset bytes. + * If mapping iterator @miter has been proceeded by sg_miter_next(), this + * stops @miter. + * + * Context: + * Don't care if @miter is stopped, or not proceeded yet. + * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set. + * + * Returns: + * true if @miter contains the valid mapping. false if end of sg + * list is reached. + */ +static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) +{ + sg_miter_stop(miter); + + while (offset) { + off_t consumed; + + if (!sg_miter_get_next_page(miter)) + return false; + + consumed = min_t(off_t, offset, miter->__remaining); + miter->__offset += consumed; + miter->__remaining -= consumed; + offset -= consumed; + } + + return true; +} + /** * sg_miter_next - proceed mapping iterator to the next mapping * @miter: sg mapping iter to proceed @@ -478,22 +539,9 @@ bool sg_miter_next(struct sg_mapping_iter *miter) * Get to the next page if necessary. * __remaining, __offset is adjusted by sg_miter_stop */ - if (!miter->__remaining) { - struct scatterlist *sg; - unsigned long pgoffset; - - if (!__sg_page_iter_next(&miter->piter)) - return false; - - sg = miter->piter.sg; - pgoffset = miter->piter.sg_pgoffset; + if (!sg_miter_get_next_page(miter)) + return false; - miter->__offset = pgoffset ? 0 : sg->offset; - miter->__remaining = sg->offset + sg->length - - (pgoffset << PAGE_SHIFT) - miter->__offset; - miter->__remaining = min_t(unsigned long, miter->__remaining, - PAGE_SIZE - miter->__offset); - } miter->page = sg_page_iter_page(&miter->piter); miter->consumed = miter->length = miter->__remaining; @@ -552,14 +600,16 @@ EXPORT_SYMBOL(sg_miter_stop); * @nents: Number of SG entries * @buf: Where to copy from * @buflen: The number of bytes to copy - * @to_buffer: transfer direction (non zero == from an sg list to a - * buffer, 0 == from a buffer to an sg list + * @skip: Number of bytes to skip before copying + * @to_buffer: transfer direction (true == from an sg list to a + * buffer, false == from a buffer to an sg list * * Returns the number of copied bytes. * **/ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, - void *buf, size_t buflen, int to_buffer) + void *buf, size_t buflen, off_t skip, + bool to_buffer) { unsigned int offset = 0; struct sg_mapping_iter miter; @@ -573,6 +623,9 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, sg_miter_start(&miter, sgl, nents, sg_flags); + if (!sg_miter_skip(&miter, skip)) + return false; + local_irq_save(flags); while (sg_miter_next(&miter) && offset < buflen) { @@ -607,7 +660,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen) { - return sg_copy_buffer(sgl, nents, buf, buflen, 0); + return sg_copy_buffer(sgl, nents, buf, buflen, 0, false); } EXPORT_SYMBOL(sg_copy_from_buffer); @@ -624,6 +677,42 @@ EXPORT_SYMBOL(sg_copy_from_buffer); size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen) { - return sg_copy_buffer(sgl, nents, buf, buflen, 1); + return sg_copy_buffer(sgl, nents, buf, buflen, 0, true); } EXPORT_SYMBOL(sg_copy_to_buffer); + +/** + * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list + * @sgl: The SG list + * @nents: Number of SG entries + * @buf: Where to copy from + * @skip: Number of bytes to skip before copying + * @buflen: The number of bytes to copy + * + * Returns the number of copied bytes. + * + **/ +size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen, off_t skip) +{ + return sg_copy_buffer(sgl, nents, buf, buflen, skip, false); +} +EXPORT_SYMBOL(sg_pcopy_from_buffer); + +/** + * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer + * @sgl: The SG list + * @nents: Number of SG entries + * @buf: Where to copy to + * @skip: Number of bytes to skip before copying + * @buflen: The number of bytes to copy + * + * Returns the number of copied bytes. + * + **/ +size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen, off_t skip) +{ + return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); +} +EXPORT_SYMBOL(sg_pcopy_to_buffer); diff --git a/mm/filemap.c b/mm/filemap.c index 7905fe7..4b51ac1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1539,12 +1539,12 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, struct address_space *mapping = file->f_mapping; /* If we don't want any read-ahead, don't bother */ - if (VM_RandomReadHint(vma)) + if (vma->vm_flags & VM_RAND_READ) return; if (!ra->ra_pages) return; - if (VM_SequentialReadHint(vma)) { + if (vma->vm_flags & VM_SEQ_READ) { page_cache_sync_readahead(mapping, ra, file, offset, ra->ra_pages); return; @@ -1584,7 +1584,7 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, struct address_space *mapping = file->f_mapping; /* If we don't want any read-ahead, don't bother */ - if (VM_RandomReadHint(vma)) + if (vma->vm_flags & VM_RAND_READ) return; if (ra->mmap_miss > 0) ra->mmap_miss--; diff --git a/mm/internal.h b/mm/internal.h index 8562de0..4390ac6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -32,11 +32,6 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } -static inline void __put_page(struct page *page) -{ - atomic_dec(&page->_count); -} - static inline void __get_page_tail_foll(struct page *page, bool get_page_head) { diff --git a/mm/memblock.c b/mm/memblock.c index c5fad93..a847bfe6 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -566,7 +566,7 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) /** * __next_free_mem_range - next function for for_each_free_mem_range() * @idx: pointer to u64 loop variable - * @nid: nid: node selector, %MAX_NUMNODES for all nodes + * @nid: node selector, %MAX_NUMNODES for all nodes * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL * @out_nid: ptr to int for nid of the range, can be %NULL diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2e851f4..d12ca6f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -187,10 +187,6 @@ struct mem_cgroup_per_node { struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; }; -struct mem_cgroup_lru_info { - struct mem_cgroup_per_node *nodeinfo[0]; -}; - /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation @@ -267,28 +263,10 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; - union { - /* - * the counter to account for mem+swap usage. - */ - struct res_counter memsw; - - /* - * rcu_freeing is used only when freeing struct mem_cgroup, - * so put it into a union to avoid wasting more memory. - * It must be disjoint from the css field. It could be - * in a union with the res field, but res plays a much - * larger part in mem_cgroup life than memsw, and might - * be of interest, even at time of free, when debugging. - * So share rcu_head with the less interesting memsw. - */ - struct rcu_head rcu_freeing; - /* - * We also need some space for a worker in deferred freeing. - * By the time we call it, rcu_freeing is no longer in use. - */ - struct work_struct work_freeing; - }; + /* + * the counter to account for mem+swap usage. + */ + struct res_counter memsw; /* * the counter to account for kernel memory usage. @@ -303,8 +281,6 @@ struct mem_cgroup { bool oom_lock; atomic_t under_oom; - atomic_t refcnt; - int swappiness; /* OOM-Killer disable */ int oom_kill_disable; @@ -366,14 +342,8 @@ struct mem_cgroup { atomic_t numainfo_updating; #endif - /* - * Per cgroup active and inactive list, similar to the - * per zone LRU lists. - * - * WARNING: This has to be the last element of the struct. Don't - * add new fields after this point. - */ - struct mem_cgroup_lru_info info; + struct mem_cgroup_per_node *nodeinfo[0]; + /* WARNING: nodeinfo must be the last member here */ }; static size_t memcg_size(void) @@ -416,6 +386,11 @@ static void memcg_kmem_clear_activated(struct mem_cgroup *memcg) static void memcg_kmem_mark_dead(struct mem_cgroup *memcg) { + /* + * Our caller must use css_get() first, because memcg_uncharge_kmem() + * will call css_put() if it sees the memcg is dead. + */ + smp_wmb(); if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags)) set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags); } @@ -508,9 +483,6 @@ enum res_type { */ static DEFINE_MUTEX(memcg_create_mutex); -static void mem_cgroup_get(struct mem_cgroup *memcg); -static void mem_cgroup_put(struct mem_cgroup *memcg); - static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) { @@ -561,15 +533,15 @@ void sock_update_memcg(struct sock *sk) */ if (sk->sk_cgrp) { BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg)); - mem_cgroup_get(sk->sk_cgrp->memcg); + css_get(&sk->sk_cgrp->memcg->css); return; } rcu_read_lock(); memcg = mem_cgroup_from_task(current); cg_proto = sk->sk_prot->proto_cgroup(memcg); - if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) { - mem_cgroup_get(memcg); + if (!mem_cgroup_is_root(memcg) && + memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) { sk->sk_cgrp = cg_proto; } rcu_read_unlock(); @@ -583,7 +555,7 @@ void sock_release_memcg(struct sock *sk) struct mem_cgroup *memcg; WARN_ON(!sk->sk_cgrp->memcg); memcg = sk->sk_cgrp->memcg; - mem_cgroup_put(memcg); + css_put(&sk->sk_cgrp->memcg->css); } } @@ -683,7 +655,7 @@ static struct mem_cgroup_per_zone * mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid) { VM_BUG_ON((unsigned)nid >= nr_node_ids); - return &memcg->info.nodeinfo[nid]->zoneinfo[zid]; + return &memcg->nodeinfo[nid]->zoneinfo[zid]; } struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) @@ -3060,8 +3032,16 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) if (res_counter_uncharge(&memcg->kmem, size)) return; + /* + * Releases a reference taken in kmem_cgroup_css_offline in case + * this last uncharge is racing with the offlining code or it is + * outliving the memcg existence. + * + * The memory barrier imposed by test&clear is paired with the + * explicit one in memcg_kmem_mark_dead(). + */ if (memcg_kmem_test_and_clear_dead(memcg)) - mem_cgroup_put(memcg); + css_put(&memcg->css); } void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) @@ -3252,7 +3232,7 @@ void memcg_release_cache(struct kmem_cache *s) list_del(&s->memcg_params->list); mutex_unlock(&memcg->slab_caches_mutex); - mem_cgroup_put(memcg); + css_put(&memcg->css); out: kfree(s->memcg_params); } @@ -3412,16 +3392,18 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, mutex_lock(&memcg_cache_mutex); new_cachep = cachep->memcg_params->memcg_caches[idx]; - if (new_cachep) + if (new_cachep) { + css_put(&memcg->css); goto out; + } new_cachep = kmem_cache_dup(memcg, cachep); if (new_cachep == NULL) { new_cachep = cachep; + css_put(&memcg->css); goto out; } - mem_cgroup_get(memcg); atomic_set(&new_cachep->memcg_params->nr_pages , 0); cachep->memcg_params->memcg_caches[idx] = new_cachep; @@ -3509,8 +3491,6 @@ static void memcg_create_cache_work_func(struct work_struct *w) cw = container_of(w, struct create_work, work); memcg_create_kmem_cache(cw->memcg, cw->cachep); - /* Drop the reference gotten when we enqueued. */ - css_put(&cw->memcg->css); kfree(cw); } @@ -3647,6 +3627,34 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) int ret; *_memcg = NULL; + + /* + * Disabling accounting is only relevant for some specific memcg + * internal allocations. Therefore we would initially not have such + * check here, since direct calls to the page allocator that are marked + * with GFP_KMEMCG only happen outside memcg core. We are mostly + * concerned with cache allocations, and by having this test at + * memcg_kmem_get_cache, we are already able to relay the allocation to + * the root cache and bypass the memcg cache altogether. + * + * There is one exception, though: the SLUB allocator does not create + * large order caches, but rather service large kmallocs directly from + * the page allocator. Therefore, the following sequence when backed by + * the SLUB allocator: + * + * memcg_stop_kmem_account(); + * kmalloc(<large_number>) + * memcg_resume_kmem_account(); + * + * would effectively ignore the fact that we should skip accounting, + * since it will drive us directly to this function without passing + * through the cache selector memcg_kmem_get_cache. Such large + * allocations are extremely rare but can happen, for instance, for the + * cache arrays. We bring this test here. + */ + if (!current->mm || current->memcg_kmem_skip_account) + return true; + memcg = try_get_mem_cgroup_from_mm(current->mm); /* @@ -4200,12 +4208,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, unlock_page_cgroup(pc); /* * even after unlock, we have memcg->res.usage here and this memcg - * will never be freed. + * will never be freed, so it's safe to call css_get(). */ memcg_check_events(memcg, page); if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { mem_cgroup_swap_statistics(memcg, true); - mem_cgroup_get(memcg); + css_get(&memcg->css); } /* * Migration does not charge the res_counter for the @@ -4317,7 +4325,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) /* * record memcg information, if swapout && memcg != NULL, - * mem_cgroup_get() was called in uncharge(). + * css_get() was called in uncharge(). */ if (do_swap_account && swapout && memcg) swap_cgroup_record(ent, css_id(&memcg->css)); @@ -4348,7 +4356,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) if (!mem_cgroup_is_root(memcg)) res_counter_uncharge(&memcg->memsw, PAGE_SIZE); mem_cgroup_swap_statistics(memcg, false); - mem_cgroup_put(memcg); + css_put(&memcg->css); } rcu_read_unlock(); } @@ -4382,11 +4390,14 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, * This function is only called from task migration context now. * It postpones res_counter and refcount handling till the end * of task migration(mem_cgroup_clear_mc()) for performance - * improvement. But we cannot postpone mem_cgroup_get(to) - * because if the process that has been moved to @to does - * swap-in, the refcount of @to might be decreased to 0. + * improvement. But we cannot postpone css_get(to) because if + * the process that has been moved to @to does swap-in, the + * refcount of @to might be decreased to 0. + * + * We are in attach() phase, so the cgroup is guaranteed to be + * alive, so we can just call css_get(). */ - mem_cgroup_get(to); + css_get(&to->css); return 0; } return -EINVAL; @@ -5165,14 +5176,6 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val) * starts accounting before all call sites are patched */ memcg_kmem_set_active(memcg); - - /* - * kmem charges can outlive the cgroup. In the case of slab - * pages, for instance, a page contain objects from various - * processes, so it is unfeasible to migrate them away. We - * need to reference count the memcg because of that. - */ - mem_cgroup_get(memcg); } else ret = res_counter_set_limit(&memcg->kmem, val); out: @@ -5205,16 +5208,16 @@ static int memcg_propagate_kmem(struct mem_cgroup *memcg) goto out; /* - * destroy(), called if we fail, will issue static_key_slow_inc() and - * mem_cgroup_put() if kmem is enabled. We have to either call them - * unconditionally, or clear the KMEM_ACTIVE flag. I personally find - * this more consistent, since it always leads to the same destroy path + * __mem_cgroup_free() will issue static_key_slow_dec() because this + * memcg is active already. If the later initialization fails then the + * cgroup core triggers the cleanup so we do not have to do it here. */ - mem_cgroup_get(memcg); static_key_slow_inc(&memcg_kmem_enabled_key); mutex_lock(&set_limit_mutex); + memcg_stop_kmem_account(); ret = memcg_update_cache_sizes(memcg); + memcg_resume_kmem_account(); mutex_unlock(&set_limit_mutex); out: return ret; @@ -5893,23 +5896,43 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) return mem_cgroup_sockets_init(memcg, ss); } -static void kmem_cgroup_destroy(struct mem_cgroup *memcg) +static void memcg_destroy_kmem(struct mem_cgroup *memcg) { mem_cgroup_sockets_destroy(memcg); +} + +static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) +{ + if (!memcg_kmem_is_active(memcg)) + return; + + /* + * kmem charges can outlive the cgroup. In the case of slab + * pages, for instance, a page contain objects from various + * processes. As we prevent from taking a reference for every + * such allocation we have to be careful when doing uncharge + * (see memcg_uncharge_kmem) and here during offlining. + * + * The idea is that that only the _last_ uncharge which sees + * the dead memcg will drop the last reference. An additional + * reference is taken here before the group is marked dead + * which is then paired with css_put during uncharge resp. here. + * + * Although this might sound strange as this path is called from + * css_offline() when the referencemight have dropped down to 0 + * and shouldn't be incremented anymore (css_tryget would fail) + * we do not have other options because of the kmem allocations + * lifetime. + */ + css_get(&memcg->css); memcg_kmem_mark_dead(memcg); if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0) return; - /* - * Charges already down to 0, undo mem_cgroup_get() done in the charge - * path here, being careful not to race with memcg_uncharge_kmem: it is - * possible that the charges went down to 0 between mark_dead and the - * res_counter read, so in that case, we don't need the put - */ if (memcg_kmem_test_and_clear_dead(memcg)) - mem_cgroup_put(memcg); + css_put(&memcg->css); } #else static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) @@ -5917,7 +5940,11 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) return 0; } -static void kmem_cgroup_destroy(struct mem_cgroup *memcg) +static void memcg_destroy_kmem(struct mem_cgroup *memcg) +{ +} + +static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) { } #endif @@ -6087,13 +6114,13 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) mz->on_tree = false; mz->memcg = memcg; } - memcg->info.nodeinfo[node] = pn; + memcg->nodeinfo[node] = pn; return 0; } static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { - kfree(memcg->info.nodeinfo[node]); + kfree(memcg->nodeinfo[node]); } static struct mem_cgroup *mem_cgroup_alloc(void) @@ -6166,49 +6193,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) vfree(memcg); } - -/* - * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU, - * but in process context. The work_freeing structure is overlaid - * on the rcu_freeing structure, which itself is overlaid on memsw. - */ -static void free_work(struct work_struct *work) -{ - struct mem_cgroup *memcg; - - memcg = container_of(work, struct mem_cgroup, work_freeing); - __mem_cgroup_free(memcg); -} - -static void free_rcu(struct rcu_head *rcu_head) -{ - struct mem_cgroup *memcg; - - memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing); - INIT_WORK(&memcg->work_freeing, free_work); - schedule_work(&memcg->work_freeing); -} - -static void mem_cgroup_get(struct mem_cgroup *memcg) -{ - atomic_inc(&memcg->refcnt); -} - -static void __mem_cgroup_put(struct mem_cgroup *memcg, int count) -{ - if (atomic_sub_and_test(count, &memcg->refcnt)) { - struct mem_cgroup *parent = parent_mem_cgroup(memcg); - call_rcu(&memcg->rcu_freeing, free_rcu); - if (parent) - mem_cgroup_put(parent); - } -} - -static void mem_cgroup_put(struct mem_cgroup *memcg) -{ - __mem_cgroup_put(memcg, 1); -} - /* * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. */ @@ -6268,7 +6252,6 @@ mem_cgroup_css_alloc(struct cgroup *cont) memcg->last_scanned_node = MAX_NUMNODES; INIT_LIST_HEAD(&memcg->oom_notify); - atomic_set(&memcg->refcnt, 1); memcg->move_charge_at_immigrate = 0; mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); @@ -6304,12 +6287,9 @@ mem_cgroup_css_online(struct cgroup *cont) res_counter_init(&memcg->kmem, &parent->kmem); /* - * We increment refcnt of the parent to ensure that we can - * safely access it on res_counter_charge/uncharge. - * This refcnt will be decremented when freeing this - * mem_cgroup(see mem_cgroup_put). + * No need to take a reference to the parent because cgroup + * core guarantees its existence. */ - mem_cgroup_get(parent); } else { res_counter_init(&memcg->res, NULL); res_counter_init(&memcg->memsw, NULL); @@ -6325,16 +6305,6 @@ mem_cgroup_css_online(struct cgroup *cont) error = memcg_init_kmem(memcg, &mem_cgroup_subsys); mutex_unlock(&memcg_create_mutex); - if (error) { - /* - * We call put now because our (and parent's) refcnts - * are already in place. mem_cgroup_put() will internally - * call __mem_cgroup_free, so return directly - */ - mem_cgroup_put(memcg); - if (parent->use_hierarchy) - mem_cgroup_put(parent); - } return error; } @@ -6360,6 +6330,8 @@ static void mem_cgroup_css_offline(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + kmem_cgroup_css_offline(memcg); + mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); mem_cgroup_destroy_all_caches(memcg); @@ -6369,9 +6341,8 @@ static void mem_cgroup_css_free(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - kmem_cgroup_destroy(memcg); - - mem_cgroup_put(memcg); + memcg_destroy_kmem(memcg); + __mem_cgroup_free(memcg); } #ifdef CONFIG_MMU @@ -6680,6 +6651,7 @@ static void __mem_cgroup_clear_mc(void) { struct mem_cgroup *from = mc.from; struct mem_cgroup *to = mc.to; + int i; /* we must uncharge all the leftover precharges from mc.to */ if (mc.precharge) { @@ -6700,7 +6672,9 @@ static void __mem_cgroup_clear_mc(void) if (!mem_cgroup_is_root(mc.from)) res_counter_uncharge(&mc.from->memsw, PAGE_SIZE * mc.moved_swap); - __mem_cgroup_put(mc.from, mc.moved_swap); + + for (i = 0; i < mc.moved_swap; i++) + css_put(&mc.from->css); if (!mem_cgroup_is_root(mc.to)) { /* @@ -6710,7 +6684,7 @@ static void __mem_cgroup_clear_mc(void) res_counter_uncharge(&mc.to->res, PAGE_SIZE * mc.moved_swap); } - /* we've already done mem_cgroup_get(mc.to) */ + /* we've already done css_get(mc.to) */ mc.moved_swap = 0; } memcg_oom_recover(from); diff --git a/mm/memory.c b/mm/memory.c index b68812d..1ce2e2a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1150,7 +1150,7 @@ again: if (pte_dirty(ptent)) set_page_dirty(page); if (pte_young(ptent) && - likely(!VM_SequentialReadHint(vma))) + likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); rss[MM_FILEPAGES]--; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f5ba127..ca1dd3a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -208,13 +208,13 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) pfn = pgdat->node_start_pfn; end_pfn = pgdat_end_pfn(pgdat); - /* register_section info */ + /* register section info */ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { /* * Some platforms can assign the same pfn to multiple nodes - on * node0 as well as nodeN. To avoid registering a pfn against * multiple nodes we check that this pfn does not already - * reside in some other node. + * reside in some other nodes. */ if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node)) register_page_bootmem_info_section(pfn); @@ -914,19 +914,19 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) && !can_online_high_movable(zone)) { unlock_memory_hotplug(); - return -1; + return -EINVAL; } if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) { if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) { unlock_memory_hotplug(); - return -1; + return -EINVAL; } } if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) { if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) { unlock_memory_hotplug(); - return -1; + return -EINVAL; } } @@ -1358,18 +1358,19 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); - if (unlikely(flags & MAP_HUGETLB)) - return -EINVAL; file = fget(fd); if (!file) goto out; if (is_file_hugepages(file)) len = ALIGN(len, huge_page_size(hstate_file(file))); + retval = -EINVAL; + if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file))) + goto out_fput; } else if (flags & MAP_HUGETLB) { struct user_struct *user = NULL; - struct hstate *hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & - SHM_HUGE_MASK); + struct hstate *hs; + hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK); if (!hs) return -EINVAL; @@ -1391,6 +1392,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); +out_fput: if (file) fput(file); out: diff --git a/mm/mremap.c b/mm/mremap.c index 3708655..457d34e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -456,13 +456,14 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long charged = 0; bool locked = false; - down_write(¤t->mm->mmap_sem); - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) - goto out; + return ret; + + if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) + return ret; if (addr & ~PAGE_MASK) - goto out; + return ret; old_len = PAGE_ALIGN(old_len); new_len = PAGE_ALIGN(new_len); @@ -473,12 +474,13 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, * a zero new-len is nonsensical. */ if (!new_len) - goto out; + return ret; + + down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { - if (flags & MREMAP_MAYMOVE) - ret = mremap_to(addr, old_len, new_addr, new_len, - &locked); + ret = mremap_to(addr, old_len, new_addr, new_len, + &locked); goto out; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 327516b..b100255 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -204,6 +204,7 @@ static char * const zone_names[MAX_NR_ZONES] = { }; int min_free_kbytes = 1024; +int user_min_free_kbytes; static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; @@ -1046,7 +1047,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * MIGRATE_CMA areas. */ if (!is_migrate_cma(migratetype) && - (unlikely(current_order >= pageblock_order / 2) || + (current_order >= pageblock_order / 2 || start_migratetype == MIGRATE_RECLAIMABLE || page_group_by_mobility_disabled)) { int pages; @@ -3153,12 +3154,10 @@ static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) * Add all populated zones of a node to the zonelist. */ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, - int nr_zones, enum zone_type zone_type) + int nr_zones) { struct zone *zone; - - BUG_ON(zone_type >= MAX_NR_ZONES); - zone_type++; + enum zone_type zone_type = MAX_NR_ZONES; do { zone_type--; @@ -3168,8 +3167,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, &zonelist->_zonerefs[nr_zones++]); check_highest_zone(zone_type); } - } while (zone_type); + return nr_zones; } @@ -3363,8 +3362,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) zonelist = &pgdat->node_zonelists[0]; for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++) ; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, - MAX_NR_ZONES - 1); + j = build_zonelists_node(NODE_DATA(node), zonelist, j); zonelist->_zonerefs[j].zone = NULL; zonelist->_zonerefs[j].zone_idx = 0; } @@ -3378,7 +3376,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat) struct zonelist *zonelist; zonelist = &pgdat->node_zonelists[1]; - j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); + j = build_zonelists_node(pgdat, zonelist, 0); zonelist->_zonerefs[j].zone = NULL; zonelist->_zonerefs[j].zone_idx = 0; } @@ -3586,7 +3584,7 @@ static void build_zonelists(pg_data_t *pgdat) local_node = pgdat->node_id; zonelist = &pgdat->node_zonelists[0]; - j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); + j = build_zonelists_node(pgdat, zonelist, 0); /* * Now we build the zonelist so that it contains the zones @@ -3599,14 +3597,12 @@ static void build_zonelists(pg_data_t *pgdat) for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, - MAX_NR_ZONES - 1); + j = build_zonelists_node(NODE_DATA(node), zonelist, j); } for (node = 0; node < local_node; node++) { if (!node_online(node)) continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, - MAX_NR_ZONES - 1); + j = build_zonelists_node(NODE_DATA(node), zonelist, j); } zonelist->_zonerefs[j].zone = NULL; @@ -4421,13 +4417,13 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid, */ static unsigned long __meminit zone_spanned_pages_in_node(int nid, unsigned long zone_type, + unsigned long node_start_pfn, + unsigned long node_end_pfn, unsigned long *ignored) { - unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; - /* Get the start and end of the node and zone */ - get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); + /* Get the start and end of the zone */ zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; adjust_zone_range_for_zone_movable(nid, zone_type, @@ -4482,14 +4478,14 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn, /* Return the number of page frames in holes in a zone on a node */ static unsigned long __meminit zone_absent_pages_in_node(int nid, unsigned long zone_type, + unsigned long node_start_pfn, + unsigned long node_end_pfn, unsigned long *ignored) { unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; - unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; - get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); @@ -4502,6 +4498,8 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, unsigned long zone_type, + unsigned long node_start_pfn, + unsigned long node_end_pfn, unsigned long *zones_size) { return zones_size[zone_type]; @@ -4509,6 +4507,8 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, static inline unsigned long __meminit zone_absent_pages_in_node(int nid, unsigned long zone_type, + unsigned long node_start_pfn, + unsigned long node_end_pfn, unsigned long *zholes_size) { if (!zholes_size) @@ -4520,21 +4520,27 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid, #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, - unsigned long *zones_size, unsigned long *zholes_size) + unsigned long node_start_pfn, + unsigned long node_end_pfn, + unsigned long *zones_size, + unsigned long *zholes_size) { unsigned long realtotalpages, totalpages = 0; enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, - zones_size); + node_start_pfn, + node_end_pfn, + zones_size); pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zone_absent_pages_in_node(pgdat->node_id, i, - zholes_size); + node_start_pfn, node_end_pfn, + zholes_size); pgdat->node_present_pages = realtotalpages; printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); @@ -4643,6 +4649,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, * NOTE: pgdat should get zeroed by caller. */ static void __paginginit free_area_init_core(struct pglist_data *pgdat, + unsigned long node_start_pfn, unsigned long node_end_pfn, unsigned long *zones_size, unsigned long *zholes_size) { enum zone_type j; @@ -4664,8 +4671,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; - size = zone_spanned_pages_in_node(nid, j, zones_size); + size = zone_spanned_pages_in_node(nid, j, node_start_pfn, + node_end_pfn, zones_size); realsize = freesize = size - zone_absent_pages_in_node(nid, j, + node_start_pfn, + node_end_pfn, zholes_size); /* @@ -4779,6 +4789,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, unsigned long node_start_pfn, unsigned long *zholes_size) { pg_data_t *pgdat = NODE_DATA(nid); + unsigned long start_pfn = 0; + unsigned long end_pfn = 0; /* pg_data_t should be reset to zero when it's allocated */ WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); @@ -4786,7 +4798,11 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; init_zone_allows_reclaim(nid); - calculate_node_totalpages(pgdat, zones_size, zholes_size); +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); +#endif + calculate_node_totalpages(pgdat, start_pfn, end_pfn, + zones_size, zholes_size); alloc_node_mem_map(pgdat); #ifdef CONFIG_FLAT_NODE_MEM_MAP @@ -4795,7 +4811,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, (unsigned long)pgdat->node_mem_map); #endif - free_area_init_core(pgdat, zones_size, zholes_size); + free_area_init_core(pgdat, start_pfn, end_pfn, + zones_size, zholes_size); } #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP @@ -5573,14 +5590,21 @@ static void __meminit setup_per_zone_inactive_ratio(void) int __meminit init_per_zone_wmark_min(void) { unsigned long lowmem_kbytes; + int new_min_free_kbytes; lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10); - - min_free_kbytes = int_sqrt(lowmem_kbytes * 16); - if (min_free_kbytes < 128) - min_free_kbytes = 128; - if (min_free_kbytes > 65536) - min_free_kbytes = 65536; + new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16); + + if (new_min_free_kbytes > user_min_free_kbytes) { + min_free_kbytes = new_min_free_kbytes; + if (min_free_kbytes < 128) + min_free_kbytes = 128; + if (min_free_kbytes > 65536) + min_free_kbytes = 65536; + } else { + pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n", + new_min_free_kbytes, user_min_free_kbytes); + } setup_per_zone_wmarks(); refresh_zone_stat_thresholds(); setup_per_zone_lowmem_reserve(); @@ -5598,8 +5622,10 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, buffer, length, ppos); - if (write) + if (write) { + user_min_free_kbytes = min_free_kbytes; setup_per_zone_wmarks(); + } return 0; } @@ -720,7 +720,7 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, * mapping is already gone, the unmap path will have * set PG_referenced or activated the page. */ - if (likely(!VM_SequentialReadHint(vma))) + if (likely(!(vma->vm_flags & VM_SEQ_READ))) referenced++; } pte_unmap_unlock(pte, ptl); diff --git a/mm/sparse.c b/mm/sparse.c index b38400f..308d5033 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -753,6 +753,7 @@ out: return ret; } +#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_FAILURE static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) { @@ -774,7 +775,6 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_section_usemap(struct page *memmap, unsigned long *usemap) { struct page *usemap_page; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 91a1047..13a5495 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -388,12 +388,12 @@ nocache: addr = ALIGN(first->va_end, align); if (addr < vstart) goto nocache; - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; } else { addr = ALIGN(vstart, align); - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; n = vmap_area_root.rb_node; @@ -420,7 +420,7 @@ nocache: if (addr + cached_hole_size < first->va_start) cached_hole_size = first->va_start - addr; addr = ALIGN(first->va_end, align); - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; if (list_is_last(&first->list, &vmap_area_list)) @@ -754,7 +754,6 @@ struct vmap_block { struct vmap_area *va; struct vmap_block_queue *vbq; unsigned long free, dirty; - DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); struct list_head free_list; struct rcu_head rcu_head; @@ -820,7 +819,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) vb->va = va; vb->free = VMAP_BBMAP_BITS; vb->dirty = 0; - bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); INIT_LIST_HEAD(&vb->free_list); @@ -873,7 +871,6 @@ static void purge_fragmented_blocks(int cpu) if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { vb->free = 0; /* prevent further allocs after releasing lock */ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ - bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); spin_lock(&vbq->lock); list_del_rcu(&vb->free_list); @@ -891,11 +888,6 @@ static void purge_fragmented_blocks(int cpu) } } -static void purge_fragmented_blocks_thiscpu(void) -{ - purge_fragmented_blocks(smp_processor_id()); -} - static void purge_fragmented_blocks_allcpus(void) { int cpu; @@ -910,7 +902,6 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) struct vmap_block *vb; unsigned long addr = 0; unsigned int order; - int purge = 0; BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); @@ -934,17 +925,7 @@ again: if (vb->free < 1UL << order) goto next; - i = bitmap_find_free_region(vb->alloc_map, - VMAP_BBMAP_BITS, order); - - if (i < 0) { - if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { - /* fragmented and no outstanding allocations */ - BUG_ON(vb->dirty != VMAP_BBMAP_BITS); - purge = 1; - } - goto next; - } + i = VMAP_BBMAP_BITS - vb->free; addr = vb->va->va_start + (i << PAGE_SHIFT); BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(vb->va->va_start)); @@ -960,9 +941,6 @@ next: spin_unlock(&vb->lock); } - if (purge) - purge_fragmented_blocks_thiscpu(); - put_cpu_var(vmap_block_queue); rcu_read_unlock(); @@ -1311,15 +1289,15 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, spin_unlock(&vmap_area_lock); } -static void clear_vm_unlist(struct vm_struct *vm) +static void clear_vm_uninitialized_flag(struct vm_struct *vm) { /* - * Before removing VM_UNLIST, + * Before removing VM_UNINITIALIZED, * we should make sure that vm has proper values. * Pair with smp_rmb() in show_numa_info(). */ smp_wmb(); - vm->flags &= ~VM_UNLIST; + vm->flags &= ~VM_UNINITIALIZED; } static struct vm_struct *__get_vm_area_node(unsigned long size, @@ -1453,7 +1431,7 @@ static void __vunmap(const void *addr, int deallocate_pages) return; if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", - addr)); + addr)) return; area = remove_vm_area(addr); @@ -1499,7 +1477,6 @@ static void __vunmap(const void *addr, int deallocate_pages) * conventions for vfree() arch-depenedent would be a really bad idea) * * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node) - * */ void vfree(const void *addr) { @@ -1511,8 +1488,8 @@ void vfree(const void *addr) return; if (unlikely(in_interrupt())) { struct vfree_deferred *p = &__get_cpu_var(vfree_deferred); - llist_add((struct llist_node *)addr, &p->list); - schedule_work(&p->wq); + if (llist_add((struct llist_node *)addr, &p->list)) + schedule_work(&p->wq); } else __vunmap(addr, 1); } @@ -1657,21 +1634,21 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST, + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED, start, end, node, gfp_mask, caller); if (!area) goto fail; addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); if (!addr) - return NULL; + goto fail; /* - * In this function, newly allocated vm_struct has VM_UNLIST flag. - * It means that vm_struct is not fully initialized. + * In this function, newly allocated vm_struct has VM_UNINITIALIZED + * flag. It means that vm_struct is not fully initialized. * Now, it is fully initialized, so remove this flag here. */ - clear_vm_unlist(area); + clear_vm_uninitialized_flag(area); /* * A ref_count = 3 is needed because the vm_struct and vmap_area @@ -2591,11 +2568,6 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; - /* Pair with smp_wmb() in clear_vm_unlist() */ - smp_rmb(); - if (v->flags & VM_UNLIST) - return; - memset(counters, 0, nr_node_ids * sizeof(unsigned int)); for (nr = 0; nr < v->nr_pages; nr++) @@ -2624,6 +2596,11 @@ static int s_show(struct seq_file *m, void *p) v = va->vm; + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ + smp_rmb(); + if (v->flags & VM_UNINITIALIZED) + return 0; + seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); diff --git a/mm/vmscan.c b/mm/vmscan.c index 99b3ac7..2cff0d4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1443,25 +1443,11 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * as there is no guarantee the dirtying process is throttled in the * same way balance_dirty_pages() manages. * - * This scales the number of dirty pages that must be under writeback - * before a zone gets flagged ZONE_WRITEBACK. It is a simple backoff - * function that has the most effect in the range DEF_PRIORITY to - * DEF_PRIORITY-2 which is the priority reclaim is considered to be - * in trouble and reclaim is considered to be in trouble. - * - * DEF_PRIORITY 100% isolated pages must be PageWriteback to throttle - * DEF_PRIORITY-1 50% must be PageWriteback - * DEF_PRIORITY-2 25% must be PageWriteback, kswapd in trouble - * ... - * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any - * isolated page is PageWriteback - * * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number * of pages under pages flagged for immediate reclaim and stall if any * are encountered in the nr_immediate check below. */ - if (nr_writeback && nr_writeback >= - (nr_taken >> (DEF_PRIORITY - sc->priority))) + if (nr_writeback && nr_writeback == nr_taken) zone_set_flag(zone, ZONE_WRITEBACK); /* @@ -2361,8 +2347,10 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, aborted_reclaim = shrink_zones(zonelist, sc); /* - * Don't shrink slabs when reclaiming memory from - * over limit cgroups + * Don't shrink slabs when reclaiming memory from over limit + * cgroups but do shrink slab at least once when aborting + * reclaim for compaction to avoid unevenly scanning file/anon + * LRU pages over slab pages. */ if (global_reclaim(sc)) { unsigned long lru_pages = 0; @@ -2404,7 +2392,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, WB_REASON_TRY_TO_FREE_PAGES); sc->may_writepage = 1; } - } while (--sc->priority >= 0); + } while (--sc->priority >= 0 && !aborted_reclaim); out: delayacct_freepages_end(); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index f97869f..6031e23 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -311,6 +311,11 @@ cmd_lzo = (cat $(filter-out FORCE,$^) | \ lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ (rm -f $@ ; false) +quiet_cmd_lz4 = LZ4 $@ +cmd_lz4 = (cat $(filter-out FORCE,$^) | \ + lz4c -l -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + # U-Boot mkimage # --------------------------------------------------------------------------- diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6afcd12..2ee9eb7 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6,6 +6,7 @@ # Licensed under the terms of the GNU GPL License version 2 use strict; +use POSIX; my $P = $0; $P =~ s@.*/@@g; @@ -399,37 +400,52 @@ sub seed_camelcase_includes { return if ($camelcase_seeded); my $files; - my $camelcase_git_file = ""; + my $camelcase_cache = ""; + my @include_files = (); + + $camelcase_seeded = 1; if (-d ".git") { my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`; chomp $git_last_include_commit; - $camelcase_git_file = ".checkpatch-camelcase.$git_last_include_commit"; - if (-f $camelcase_git_file) { - open(my $camelcase_file, '<', "$camelcase_git_file") - or warn "$P: Can't read '$camelcase_git_file' $!\n"; - while (<$camelcase_file>) { - chomp; - $camelcase{$_} = 1; - } - close($camelcase_file); - - return; - } - $files = `git ls-files include`; + $camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit"; } else { + my $last_mod_date = 0; $files = `find $root/include -name "*.h"`; + @include_files = split('\n', $files); + foreach my $file (@include_files) { + my $date = POSIX::strftime("%Y%m%d%H%M", + localtime((stat $file)[9])); + $last_mod_date = $date if ($last_mod_date < $date); + } + $camelcase_cache = ".checkpatch-camelcase.date.$last_mod_date"; + } + + if ($camelcase_cache ne "" && -f $camelcase_cache) { + open(my $camelcase_file, '<', "$camelcase_cache") + or warn "$P: Can't read '$camelcase_cache' $!\n"; + while (<$camelcase_file>) { + chomp; + $camelcase{$_} = 1; + } + close($camelcase_file); + + return; + } + + if (-d ".git") { + $files = `git ls-files "include/*.h"`; + @include_files = split('\n', $files); } - my @include_files = split('\n', $files); + foreach my $file (@include_files) { seed_camelcase_file($file); } - $camelcase_seeded = 1; - if ($camelcase_git_file ne "") { + if ($camelcase_cache ne "") { unlink glob ".checkpatch-camelcase.*"; - open(my $camelcase_file, '>', "$camelcase_git_file") - or warn "$P: Can't write '$camelcase_git_file' $!\n"; + open(my $camelcase_file, '>', "$camelcase_cache") + or warn "$P: Can't write '$camelcase_cache' $!\n"; foreach (sort { lc($a) cmp lc($b) } keys(%camelcase)) { print $camelcase_file ("$_\n"); } diff --git a/usr/Kconfig b/usr/Kconfig index 085872b..642f503 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -90,6 +90,15 @@ config RD_LZO Support loading of a LZO encoded initial ramdisk or cpio buffer If unsure, say N. +config RD_LZ4 + bool "Support initial ramdisks compressed using LZ4" if EXPERT + default !EXPERT + depends on BLK_DEV_INITRD + select DECOMPRESS_LZ4 + help + Support loading of a LZ4 encoded initial ramdisk or cpio buffer + If unsure, say N. + choice prompt "Built-in initramfs compression mode" if INITRAMFS_SOURCE!="" help |