diff options
Diffstat (limited to 'arch')
190 files changed, 3430 insertions, 796 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 245058b..290f02ee 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -6,6 +6,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION @@ -312,7 +313,7 @@ config ARCH_MULTIPLATFORM config ARCH_INTEGRATOR bool "ARM Ltd. Integrator family" select ARM_AMBA - select ARM_PATCH_PHYS_VIRT + select ARM_PATCH_PHYS_VIRT if MMU select AUTO_ZRELADDR select COMMON_CLK select COMMON_CLK_VERSATILE @@ -658,7 +659,7 @@ config ARCH_MSM config ARCH_SHMOBILE_LEGACY bool "Renesas ARM SoCs (non-multiplatform)" select ARCH_SHMOBILE - select ARM_PATCH_PHYS_VIRT + select ARM_PATCH_PHYS_VIRT if MMU select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_ARM_SCU if SMP diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index ecb2677..e2156a5 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -529,8 +529,8 @@ serial-dir = < /* 0: INACTIVE, 1: TX, 2: RX */ 0 0 1 2 >; - tx-num-evt = <1>; - rx-num-evt = <1>; + tx-num-evt = <32>; + rx-num-evt = <32>; }; &tps { diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index ab9a34c..80a3b21 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -560,8 +560,8 @@ serial-dir = < /* 0: INACTIVE, 1: TX, 2: RX */ 0 0 1 2 >; - tx-num-evt = <1>; - rx-num-evt = <1>; + tx-num-evt = <32>; + rx-num-evt = <32>; }; &tscadc { diff --git a/arch/arm/boot/dts/am335x-igep0033.dtsi b/arch/arm/boot/dts/am335x-igep0033.dtsi index 8a0a72d..a1a0cc5 100644 --- a/arch/arm/boot/dts/am335x-igep0033.dtsi +++ b/arch/arm/boot/dts/am335x-igep0033.dtsi @@ -105,10 +105,16 @@ &cpsw_emac0 { phy_id = <&davinci_mdio>, <0>; + phy-mode = "rmii"; }; &cpsw_emac1 { phy_id = <&davinci_mdio>, <1>; + phy-mode = "rmii"; +}; + +&phy_sel { + rmii-clock-ext; }; &elm { diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 19f1f7e..90098f9 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -319,6 +319,10 @@ phy-mode = "rmii"; }; +&phy_sel { + rmii-clock-ext; +}; + &i2c0 { status = "okay"; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index 2877959..b84bac5 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -925,7 +925,7 @@ compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00500000 0x00100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>, + clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck"; status = "disabled"; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index d6133f4..2c0d6ea 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1045,6 +1045,8 @@ reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; + clocks = <&usb>, <&udphs_clk>; + clock-names = "hclk", "pclk"; status = "disabled"; ep0 { @@ -1122,6 +1124,7 @@ compatible = "atmel,at91sam9rl-pwm"; reg = <0xf8034000 0x300>; interrupts = <18 IRQ_TYPE_LEVEL_HIGH 4>; + clocks = <&pwm_clk>; #pwm-cells = <3>; status = "disabled"; }; @@ -1153,8 +1156,7 @@ compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00600000 0x100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>, - <&uhpck>; + clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index 4adc280..8308954 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -240,6 +240,7 @@ regulator-name = "ldo3"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; regulator-boot-on; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index c29945e..8012763 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -773,7 +773,6 @@ clocks = <&qspi_gfclk_div>; clock-names = "fck"; num-cs = <4>; - interrupts = <0 343 0x4>; status = "disabled"; }; @@ -984,6 +983,17 @@ #size-cells = <1>; status = "disabled"; }; + + atl: atl@4843c000 { + compatible = "ti,dra7-atl"; + reg = <0x4843c000 0x3ff>; + ti,hwmods = "atl"; + ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>, + <&atl_clkin2_ck>, <&atl_clkin3_ck>; + clocks = <&atl_gfclk_mux>; + clock-names = "fck"; + status = "disabled"; + }; }; }; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index b03cfe4..dc7a292 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -10,26 +10,26 @@ &cm_core_aon_clocks { atl_clkin0_ck: atl_clkin0_ck { #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; }; atl_clkin1_ck: atl_clkin1_ck { #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; }; atl_clkin2_ck: atl_clkin2_ck { #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; }; atl_clkin3_ck: atl_clkin3_ck { #clock-cells = <0>; - compatible = "fixed-clock"; - clock-frequency = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; }; hdmi_clkin_ck: hdmi_clkin_ck { @@ -673,10 +673,12 @@ l3_iclk_div: l3_iclk_div { #clock-cells = <0>; - compatible = "fixed-factor-clock"; + compatible = "ti,divider-clock"; + ti,max-div = <2>; + ti,bit-shift = <4>; + reg = <0x0100>; clocks = <&dpll_core_h12x2_ck>; - clock-mult = <1>; - clock-div = <1>; + ti,index-power-of-two; }; l4_root_clk_div: l4_root_clk_div { @@ -684,7 +686,7 @@ compatible = "fixed-factor-clock"; clocks = <&l3_iclk_div>; clock-mult = <1>; - clock-div = <1>; + clock-div = <2>; }; video1_clk2_div: video1_clk2_div { diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index fbaf426..17b22e9 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -554,7 +554,7 @@ interrupts = <0 37 0>, <0 38 0>, <0 39 0>, <0 40 0>, <0 41 0>; clocks = <&clock CLK_PWM>; clock-names = "timers"; - #pwm-cells = <2>; + #pwm-cells = <3>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index 481beec..a40a5c2 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -167,7 +167,7 @@ compatible = "samsung,exynos5420-audss-clock"; reg = <0x03810000 0x0C>; #clock-cells = <1>; - clocks = <&clock CLK_FIN_PLL>, <&clock CLK_FOUT_EPLL>, + clocks = <&clock CLK_FIN_PLL>, <&clock CLK_MAU_EPLL>, <&clock CLK_SCLK_MAUDIO0>, <&clock CLK_SCLK_MAUPCM0>; clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in"; }; @@ -260,6 +260,9 @@ mfc_pd: power-domain@10044060 { compatible = "samsung,exynos4210-pd"; reg = <0x10044060 0x20>; + clocks = <&clock CLK_FIN_PLL>, <&clock CLK_MOUT_SW_ACLK333>, + <&clock CLK_MOUT_USER_ACLK333>; + clock-names = "oscclk", "pclk0", "clk0"; }; disp_pd: power-domain@100440C0 { diff --git a/arch/arm/boot/dts/hi3620.dtsi b/arch/arm/boot/dts/hi3620.dtsi index ab1116d..83a5b86 100644 --- a/arch/arm/boot/dts/hi3620.dtsi +++ b/arch/arm/boot/dts/hi3620.dtsi @@ -73,7 +73,7 @@ L2: l2-cache { compatible = "arm,pl310-cache"; - reg = <0xfc10000 0x100000>; + reg = <0x100000 0x100000>; interrupts = <0 15 4>; cache-unified; cache-level = <2>; diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index cf0be66..1becefc 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -251,6 +251,11 @@ codec { }; }; + + twl_power: power { + compatible = "ti,twl4030-power-beagleboard-xm", "ti,twl4030-power-idle-osc-off"; + ti,use_poweroff; + }; }; }; @@ -301,6 +306,7 @@ }; &uart3 { + interrupts-extended = <&intc 74 &omap3_pmx_core OMAP3_UART3_RX>; pinctrl-names = "default"; pinctrl-0 = <&uart3_pins>; }; diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi index 8ae8f00..c8747c7 100644 --- a/arch/arm/boot/dts/omap3-evm-common.dtsi +++ b/arch/arm/boot/dts/omap3-evm-common.dtsi @@ -50,6 +50,13 @@ gpios = <&twl_gpio 18 GPIO_ACTIVE_LOW>; }; +&twl { + twl_power: power { + compatible = "ti,twl4030-power-omap3-evm", "ti,twl4030-power-idle"; + ti,use_poweroff; + }; +}; + &i2c2 { clock-frequency = <400000>; }; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index ae8ae3f..b15f1a7 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -351,6 +351,11 @@ compatible = "ti,twl4030-audio"; ti,enable-vibra = <1>; }; + + twl_power: power { + compatible = "ti,twl4030-power-n900"; + ti,use_poweroff; + }; }; &twl_keypad { diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 3bfda16..a4ed549 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -45,7 +45,6 @@ operating-points = < /* kHz uV */ - 500000 880000 1000000 1060000 1500000 1250000 >; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 8d7ffae..79f68ac 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -540,9 +540,9 @@ #clock-cells = <0>; clock-output-names = "sd1"; }; - sd2_clk: sd3_clk@e615007c { + sd2_clk: sd3_clk@e615026c { compatible = "renesas,r8a7791-div6-clock", "renesas,cpg-div6-clock"; - reg = <0 0xe615007c 0 4>; + reg = <0 0xe615026c 0 4>; clocks = <&pll1_div2_clk>; #clock-cells = <0>; clock-output-names = "sd2"; diff --git a/arch/arm/boot/dts/ste-nomadik-s8815.dts b/arch/arm/boot/dts/ste-nomadik-s8815.dts index f557feb..90d8b6c 100644 --- a/arch/arm/boot/dts/ste-nomadik-s8815.dts +++ b/arch/arm/boot/dts/ste-nomadik-s8815.dts @@ -4,7 +4,7 @@ */ /dts-v1/; -/include/ "ste-nomadik-stn8815.dtsi" +#include "ste-nomadik-stn8815.dtsi" / { model = "Calao Systems USB-S8815"; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index d316c95..dbcf521 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -1,7 +1,9 @@ /* * Device Tree for the ST-Ericsson Nomadik 8815 STn8815 SoC */ -/include/ "skeleton.dtsi" + +#include <dt-bindings/gpio/gpio.h> +#include "skeleton.dtsi" / { #address-cells = <1>; @@ -842,8 +844,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; - cd-gpios = <&gpio3 15 0x1>; - cd-inverted; + cd-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>; vmmc-supply = <&vmmc_regulator>; diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig index 9d13dae..4bf7226 100644 --- a/arch/arm/configs/bcm_defconfig +++ b/arch/arm/configs/bcm_defconfig @@ -94,10 +94,10 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y # CONFIG_USB_SUPPORT is not set CONFIG_MMC=y -CONFIG_MMC_UNSAFE_RESUME=y CONFIG_MMC_BLOCK_MINORS=32 CONFIG_MMC_TEST=y CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_BCM_KONA=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index be1a345..5348364 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -223,12 +223,12 @@ CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_SUN6I=y CONFIG_SENSORS_LM90=y CONFIG_THERMAL=y -CONFIG_DOVE_THERMAL=y CONFIG_ARMADA_THERMAL=y CONFIG_WATCHDOG=y CONFIG_ORION_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y CONFIG_MFD_AS3722=y +CONFIG_MFD_BCM590XX=y CONFIG_MFD_CROS_EC=y CONFIG_MFD_CROS_EC_SPI=y CONFIG_MFD_MAX8907=y @@ -240,6 +240,7 @@ CONFIG_MFD_TPS65910=y CONFIG_REGULATOR_VIRTUAL_CONSUMER=y CONFIG_REGULATOR_AB8500=y CONFIG_REGULATOR_AS3722=y +CONFIG_REGULATOR_BCM590XX=y CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_MAX8907=y CONFIG_REGULATOR_PALMAS=y diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 4522366..15468fb 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c @@ -137,7 +137,7 @@ static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, dst += AES_BLOCK_SIZE; } while (--blocks); } - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -158,7 +158,7 @@ static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } while (walk.nbytes) { u32 blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -182,7 +182,7 @@ static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; } while (--blocks); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -268,7 +268,7 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc, bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->enc, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -292,7 +292,7 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc, bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 060a75e..0406cb3 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -50,6 +50,7 @@ struct machine_desc { struct smp_operations *smp; /* SMP operations */ bool (*smp_init)(void); void (*fixup)(struct tag *, char **); + void (*dt_fixup)(void); void (*init_meminfo)(void); void (*reserve)(void);/* reserve mem blocks */ void (*map_io)(void);/* IO mapping function */ diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index e94a157..11c54de 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -212,7 +212,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) + if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); @@ -237,6 +237,12 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) dump_machine_table(); /* does not return */ } + /* We really don't want to do this, but sometimes firmware provides buggy data */ + if (mdesc->dt_fixup) + mdesc->dt_fixup(); + + early_init_dt_scan_nodes(); + /* Change machine number to match the mdesc we're using */ __machine_arch_type = mdesc->nr; diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index a5599cf..2b32978 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -94,13 +94,19 @@ ENTRY(iwmmxt_task_enable) mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait + bl concan_save - teq r1, #0 @ test for last ownership - mov lr, r9 @ normal exit from exception - beq concan_load @ no owner, skip save +#ifdef CONFIG_PREEMPT_COUNT + get_thread_info r10 +#endif +4: dec_preempt_count r10, r3 + mov pc, r9 @ normal exit from exception concan_save: + teq r1, #0 @ test for last ownership + beq concan_load @ no owner, skip save + tmrc r2, wCon @ CUP? wCx @@ -138,7 +144,7 @@ concan_dump: wstrd wR15, [r1, #MMX_WR15] 2: teq r0, #0 @ anything to load? - beq 3f + moveq pc, lr @ if not, return concan_load: @@ -171,14 +177,9 @@ concan_load: @ clear CUP/MUP (only if r1 != 0) teq r1, #0 mov r2, #0 - beq 3f - tmcr wCon, r2 + moveq pc, lr -3: -#ifdef CONFIG_PREEMPT_COUNT - get_thread_info r10 -#endif -4: dec_preempt_count r10, r3 + tmcr wCon, r2 mov pc, lr /* diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7..a74b53c 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -160,12 +160,16 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) static struct undef_hook kgdb_brkpt_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; static struct undef_hook kgdb_compiled_brkpt_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c index 9db4b65..cb14242 100644 --- a/arch/arm/kernel/kprobes-test-arm.c +++ b/arch/arm/kernel/kprobes-test-arm.c @@ -74,8 +74,6 @@ void kprobe_arm_test_cases(void) TEST_RRR( op "lt" s " r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\ TEST_RR( op "gt" s " r12, r13" ", r",14,val, ", ror r",14,7,"")\ TEST_RR( op "le" s " r14, r",0, val, ", r13" ", lsl r",14,8,"")\ - TEST_RR( op s " r12, pc" ", r",14,val, ", ror r",14,7,"")\ - TEST_RR( op s " r14, r",0, val, ", pc" ", lsl r",14,8,"")\ TEST_R( op "eq" s " r0, r",11,VAL1,", #0xf5") \ TEST_R( op "ne" s " r11, r",0, VAL1,", #0xf5000000") \ TEST_R( op s " r7, r",8, VAL2,", #0x000af000") \ @@ -103,8 +101,6 @@ void kprobe_arm_test_cases(void) TEST_RRR( op "ge r",11,VAL1,", r",14,N(val),", asr r",7, 6,"") \ TEST_RR( op "le r13" ", r",14,val, ", ror r",14,7,"") \ TEST_RR( op "gt r",0, val, ", r13" ", lsl r",14,8,"") \ - TEST_RR( op " pc" ", r",14,val, ", ror r",14,7,"") \ - TEST_RR( op " r",0, val, ", pc" ", lsl r",14,8,"") \ TEST_R( op "eq r",11,VAL1,", #0xf5") \ TEST_R( op "ne r",0, VAL1,", #0xf5000000") \ TEST_R( op " r",8, VAL2,", #0x000af000") @@ -125,7 +121,6 @@ void kprobe_arm_test_cases(void) TEST_RR( op "ge" s " r11, r",11,N(val),", asr r",7, 6,"") \ TEST_RR( op "lt" s " r12, r",11,val, ", ror r",14,7,"") \ TEST_R( op "gt" s " r14, r13" ", lsl r",14,8,"") \ - TEST_R( op "le" s " r14, pc" ", lsl r",14,8,"") \ TEST( op "eq" s " r0, #0xf5") \ TEST( op "ne" s " r11, #0xf5000000") \ TEST( op s " r7, #0x000af000") \ @@ -159,12 +154,19 @@ void kprobe_arm_test_cases(void) TEST_SUPPORTED("cmp pc, #0x1000"); TEST_SUPPORTED("cmp sp, #0x1000"); - /* Data-processing with PC as shift*/ + /* Data-processing with PC and a shift count in a register */ TEST_UNSUPPORTED(__inst_arm(0xe15c0f1e) " @ cmp r12, r14, asl pc") TEST_UNSUPPORTED(__inst_arm(0xe1a0cf1e) " @ mov r12, r14, asl pc") TEST_UNSUPPORTED(__inst_arm(0xe08caf1e) " @ add r10, r12, r14, asl pc") - - /* Data-processing with PC as shift*/ + TEST_UNSUPPORTED(__inst_arm(0xe151021f) " @ cmp r1, pc, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe17f0211) " @ cmn pc, r1, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe1a0121f) " @ mov r1, pc, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe1a0f211) " @ mov pc, r1, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe042131f) " @ sub r1, r2, pc, lsl r3") + TEST_UNSUPPORTED(__inst_arm(0xe1cf1312) " @ bic r1, pc, r2, lsl r3") + TEST_UNSUPPORTED(__inst_arm(0xe081f312) " @ add pc, r1, r2, lsl r3") + + /* Data-processing with PC as a target and status registers updated */ TEST_UNSUPPORTED("movs pc, r1") TEST_UNSUPPORTED("movs pc, r1, lsl r2") TEST_UNSUPPORTED("movs pc, #0x10000") @@ -187,14 +189,14 @@ void kprobe_arm_test_cases(void) TEST_BF_R ("add pc, pc, r",14,2f-1f-8,"") TEST_BF_R ("add pc, r",14,2f-1f-8,", pc") TEST_BF_R ("mov pc, r",0,2f,"") - TEST_BF_RR("mov pc, r",0,2f,", asl r",1,0,"") + TEST_BF_R ("add pc, pc, r",14,(2f-1f-8)*2,", asr #1") TEST_BB( "sub pc, pc, #1b-2b+8") #if __LINUX_ARM_ARCH__ == 6 && !defined(CONFIG_CPU_V7) TEST_BB( "sub pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before and after ARMv6 */ #endif TEST_BB_R( "sub pc, pc, r",14, 1f-2f+8,"") TEST_BB_R( "rsb pc, r",14,1f-2f+8,", pc") - TEST_RR( "add pc, pc, r",10,-2,", asl r",11,1,"") + TEST_R( "add pc, pc, r",10,-2,", asl #1") #ifdef CONFIG_THUMB2_KERNEL TEST_ARM_TO_THUMB_INTERWORK_R("add pc, pc, r",0,3f-1f-8+1,"") TEST_ARM_TO_THUMB_INTERWORK_R("sub pc, r",0,3f+8+1,", #8") @@ -216,6 +218,7 @@ void kprobe_arm_test_cases(void) TEST_BB_R("bx r",7,2f,"") TEST_BF_R("bxeq r",14,2f,"") +#if __LINUX_ARM_ARCH__ >= 5 TEST_R("clz r0, r",0, 0x0,"") TEST_R("clzeq r7, r",14,0x1,"") TEST_R("clz lr, r",7, 0xffffffff,"") @@ -337,6 +340,7 @@ void kprobe_arm_test_cases(void) TEST_UNSUPPORTED(__inst_arm(0xe16f02e1) " @ smultt pc, r1, r2") TEST_UNSUPPORTED(__inst_arm(0xe16002ef) " @ smultt r0, pc, r2") TEST_UNSUPPORTED(__inst_arm(0xe1600fe1) " @ smultt r0, r1, pc") +#endif TEST_GROUP("Multiply and multiply-accumulate") @@ -559,6 +563,7 @@ void kprobe_arm_test_cases(void) TEST_UNSUPPORTED("ldrsht r1, [r2], #48") #endif +#if __LINUX_ARM_ARCH__ >= 5 TEST_RPR( "strd r",0, VAL1,", [r",1, 48,", -r",2,24,"]") TEST_RPR( "strccd r",8, VAL2,", [r",13,0, ", r",12,48,"]") TEST_RPR( "strd r",4, VAL1,", [r",2, 24,", r",3, 48,"]!") @@ -595,6 +600,7 @@ void kprobe_arm_test_cases(void) TEST_UNSUPPORTED(__inst_arm(0xe1efc3d0) " @ ldrd r12, [pc, #48]!") TEST_UNSUPPORTED(__inst_arm(0xe0c9f3d0) " @ ldrd pc, [r9], #48") TEST_UNSUPPORTED(__inst_arm(0xe0c9e3d0) " @ ldrd lr, [r9], #48") +#endif TEST_GROUP("Miscellaneous") @@ -1227,7 +1233,9 @@ void kprobe_arm_test_cases(void) TEST_COPROCESSOR( "mrc"two" 0, 0, r0, cr0, cr0, 0") COPROCESSOR_INSTRUCTIONS_ST_LD("",e) +#if __LINUX_ARM_ARCH__ >= 5 COPROCESSOR_INSTRUCTIONS_MC_MR("",e) +#endif TEST_UNSUPPORTED("svc 0") TEST_UNSUPPORTED("svc 0xffffff") @@ -1287,7 +1295,9 @@ void kprobe_arm_test_cases(void) TEST( "blx __dummy_thumb_subroutine_odd") #endif /* __LINUX_ARM_ARCH__ >= 6 */ +#if __LINUX_ARM_ARCH__ >= 5 COPROCESSOR_INSTRUCTIONS_ST_LD("2",f) +#endif #if __LINUX_ARM_ARCH__ >= 6 COPROCESSOR_INSTRUCTIONS_MC_MR("2",f) #endif diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c index 3796399..08d7312 100644 --- a/arch/arm/kernel/kprobes-test.c +++ b/arch/arm/kernel/kprobes-test.c @@ -225,6 +225,7 @@ static int pre_handler_called; static int post_handler_called; static int jprobe_func_called; static int kretprobe_handler_called; +static int tests_failed; #define FUNC_ARG1 0x12345678 #define FUNC_ARG2 0xabcdef @@ -461,6 +462,13 @@ static int run_api_tests(long (*func)(long, long)) pr_info(" jprobe\n"); ret = test_jprobe(func); +#if defined(CONFIG_THUMB2_KERNEL) && !defined(MODULE) + if (ret == -EINVAL) { + pr_err("FAIL: Known longtime bug with jprobe on Thumb kernels\n"); + tests_failed = ret; + ret = 0; + } +#endif if (ret < 0) return ret; @@ -1672,6 +1680,8 @@ static int __init run_all_tests(void) out: if (ret == 0) + ret = tests_failed; + if (ret == 0) pr_info("Finished kprobe tests OK\n"); else pr_err("kprobe tests failed\n"); diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/kernel/probes-arm.c index 51a13a0..8eaef81 100644 --- a/arch/arm/kernel/probes-arm.c +++ b/arch/arm/kernel/probes-arm.c @@ -341,12 +341,12 @@ static const union decode_item arm_cccc_000x_table[] = { /* CMP (reg-shift reg) cccc 0001 0101 xxxx xxxx xxxx 0xx1 xxxx */ /* CMN (reg-shift reg) cccc 0001 0111 xxxx xxxx xxxx 0xx1 xxxx */ DECODE_EMULATEX (0x0f900090, 0x01100010, PROBES_DATA_PROCESSING_REG, - REGS(ANY, 0, NOPC, 0, ANY)), + REGS(NOPC, 0, NOPC, 0, NOPC)), /* MOV (reg-shift reg) cccc 0001 101x xxxx xxxx xxxx 0xx1 xxxx */ /* MVN (reg-shift reg) cccc 0001 111x xxxx xxxx xxxx 0xx1 xxxx */ DECODE_EMULATEX (0x0fa00090, 0x01a00010, PROBES_DATA_PROCESSING_REG, - REGS(0, ANY, NOPC, 0, ANY)), + REGS(0, NOPC, NOPC, 0, NOPC)), /* AND (reg-shift reg) cccc 0000 000x xxxx xxxx xxxx 0xx1 xxxx */ /* EOR (reg-shift reg) cccc 0000 001x xxxx xxxx xxxx 0xx1 xxxx */ @@ -359,7 +359,7 @@ static const union decode_item arm_cccc_000x_table[] = { /* ORR (reg-shift reg) cccc 0001 100x xxxx xxxx xxxx 0xx1 xxxx */ /* BIC (reg-shift reg) cccc 0001 110x xxxx xxxx xxxx 0xx1 xxxx */ DECODE_EMULATEX (0x0e000090, 0x00000010, PROBES_DATA_PROCESSING_REG, - REGS(ANY, ANY, NOPC, 0, ANY)), + REGS(NOPC, NOPC, NOPC, 0, NOPC)), DECODE_END }; diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 9d85318..e35d880 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -275,7 +275,7 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } -static inline const int cpu_corepower_flags(void) +static inline int cpu_corepower_flags(void) { return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; } diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index f38cf7c1..66c9b96 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -173,10 +173,8 @@ static struct platform_device exynos_cpuidle = { void __init exynos_cpuidle_init(void) { - if (soc_is_exynos5440()) - return; - - platform_device_register(&exynos_cpuidle); + if (soc_is_exynos4210() || soc_is_exynos5250()) + platform_device_register(&exynos_cpuidle); } void __init exynos_cpufreq_init(void) @@ -297,7 +295,7 @@ static void __init exynos_dt_machine_init(void) * This is called from smp_prepare_cpus if we've built for SMP, but * we still need to set it up for PM and firmware ops if not. */ - if (!IS_ENABLED(SMP)) + if (!IS_ENABLED(CONFIG_SMP)) exynos_sysram_init(); exynos_cpuidle_init(); @@ -337,6 +335,15 @@ static void __init exynos_reserve(void) #endif } +static void __init exynos_dt_fixup(void) +{ + /* + * Some versions of uboot pass garbage entries in the memory node, + * use the old CONFIG_ARM_NR_BANKS + */ + of_fdt_limit_memory(8); +} + DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)") /* Maintainer: Thomas Abraham <thomas.abraham@linaro.org> */ /* Maintainer: Kukjin Kim <kgene.kim@samsung.com> */ @@ -350,4 +357,5 @@ DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)") .dt_compat = exynos_dt_compat, .restart = exynos_restart, .reserve = exynos_reserve, + .dt_fixup = exynos_dt_fixup, MACHINE_END diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c index eb91d23..e8797bb 100644 --- a/arch/arm/mach-exynos/firmware.c +++ b/arch/arm/mach-exynos/firmware.c @@ -57,8 +57,13 @@ static int exynos_set_cpu_boot_addr(int cpu, unsigned long boot_addr) boot_reg = sysram_ns_base_addr + 0x1c; - if (!soc_is_exynos4212() && !soc_is_exynos3250()) - boot_reg += 4*cpu; + /* + * Almost all Exynos-series of SoCs that run in secure mode don't need + * additional offset for every CPU, with Exynos4412 being the only + * exception. + */ + if (soc_is_exynos4412()) + boot_reg += 4 * cpu; __raw_writel(boot_addr, boot_reg); return 0; diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c index 8a134d0..920a4ba 100644 --- a/arch/arm/mach-exynos/hotplug.c +++ b/arch/arm/mach-exynos/hotplug.c @@ -40,15 +40,17 @@ static inline void cpu_leave_lowpower(void) static inline void platform_do_lowpower(unsigned int cpu, int *spurious) { + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + for (;;) { - /* make cpu1 to be turned off at next WFI command */ - if (cpu == 1) - exynos_cpu_power_down(cpu); + /* Turn the CPU off on next WFI instruction. */ + exynos_cpu_power_down(core_id); wfi(); - if (pen_release == cpu_logical_map(cpu)) { + if (pen_release == core_id) { /* * OK, proper wakeup, we're done */ diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 1c8d31e..50b9aad 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -90,7 +90,8 @@ static void exynos_secondary_init(unsigned int cpu) static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) { unsigned long timeout; - unsigned long phys_cpu = cpu_logical_map(cpu); + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); int ret = -ENOSYS; /* @@ -104,17 +105,18 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * the holding pen - release it, then wait for it to flag * that it has been released by resetting pen_release. * - * Note that "pen_release" is the hardware CPU ID, whereas + * Note that "pen_release" is the hardware CPU core ID, whereas * "cpu" is Linux's internal ID. */ - write_pen_release(phys_cpu); + write_pen_release(core_id); - if (!exynos_cpu_power_state(cpu)) { - exynos_cpu_power_up(cpu); + if (!exynos_cpu_power_state(core_id)) { + exynos_cpu_power_up(core_id); timeout = 10; /* wait max 10 ms until cpu1 is on */ - while (exynos_cpu_power_state(cpu) != S5P_CORE_LOCAL_PWR_EN) { + while (exynos_cpu_power_state(core_id) + != S5P_CORE_LOCAL_PWR_EN) { if (timeout-- == 0) break; @@ -145,20 +147,20 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Try to set boot address using firmware first * and fall back to boot register if it fails. */ - ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr); + ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr); if (ret && ret != -ENOSYS) goto fail; if (ret == -ENOSYS) { - void __iomem *boot_reg = cpu_boot_reg(phys_cpu); + void __iomem *boot_reg = cpu_boot_reg(core_id); if (IS_ERR(boot_reg)) { ret = PTR_ERR(boot_reg); goto fail; } - __raw_writel(boot_addr, cpu_boot_reg(phys_cpu)); + __raw_writel(boot_addr, cpu_boot_reg(core_id)); } - call_firmware_op(cpu_boot, phys_cpu); + call_firmware_op(cpu_boot, core_id); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -227,22 +229,24 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) * boot register if it fails. */ for (i = 1; i < max_cpus; ++i) { - unsigned long phys_cpu; unsigned long boot_addr; + u32 mpidr; + u32 core_id; int ret; - phys_cpu = cpu_logical_map(i); + mpidr = cpu_logical_map(i); + core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); boot_addr = virt_to_phys(exynos4_secondary_startup); - ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr); + ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr); if (ret && ret != -ENOSYS) break; if (ret == -ENOSYS) { - void __iomem *boot_reg = cpu_boot_reg(phys_cpu); + void __iomem *boot_reg = cpu_boot_reg(core_id); if (IS_ERR(boot_reg)) break; - __raw_writel(boot_addr, cpu_boot_reg(phys_cpu)); + __raw_writel(boot_addr, cpu_boot_reg(core_id)); } } } diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index fe6570e..797cb13 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/pm_domain.h> +#include <linux/clk.h> #include <linux/delay.h> #include <linux/of_address.h> #include <linux/of_platform.h> @@ -24,6 +25,8 @@ #include "regs-pmu.h" +#define MAX_CLK_PER_DOMAIN 4 + /* * Exynos specific wrapper around the generic power domain */ @@ -32,6 +35,9 @@ struct exynos_pm_domain { char const *name; bool is_off; struct generic_pm_domain pd; + struct clk *oscclk; + struct clk *clk[MAX_CLK_PER_DOMAIN]; + struct clk *pclk[MAX_CLK_PER_DOMAIN]; }; static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on) @@ -44,6 +50,19 @@ static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on) pd = container_of(domain, struct exynos_pm_domain, pd); base = pd->base; + /* Set oscclk before powering off a domain*/ + if (!power_on) { + int i; + + for (i = 0; i < MAX_CLK_PER_DOMAIN; i++) { + if (IS_ERR(pd->clk[i])) + break; + if (clk_set_parent(pd->clk[i], pd->oscclk)) + pr_err("%s: error setting oscclk as parent to clock %d\n", + pd->name, i); + } + } + pwr = power_on ? S5P_INT_LOCAL_PWR_EN : 0; __raw_writel(pwr, base); @@ -60,6 +79,20 @@ static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on) cpu_relax(); usleep_range(80, 100); } + + /* Restore clocks after powering on a domain*/ + if (power_on) { + int i; + + for (i = 0; i < MAX_CLK_PER_DOMAIN; i++) { + if (IS_ERR(pd->clk[i])) + break; + if (clk_set_parent(pd->clk[i], pd->pclk[i])) + pr_err("%s: error setting parent to clock%d\n", + pd->name, i); + } + } + return 0; } @@ -152,9 +185,11 @@ static __init int exynos4_pm_init_power_domain(void) for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") { struct exynos_pm_domain *pd; - int on; + int on, i; + struct device *dev; pdev = of_find_device_by_node(np); + dev = &pdev->dev; pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) { @@ -170,6 +205,30 @@ static __init int exynos4_pm_init_power_domain(void) pd->pd.power_on = exynos_pd_power_on; pd->pd.of_node = np; + pd->oscclk = clk_get(dev, "oscclk"); + if (IS_ERR(pd->oscclk)) + goto no_clk; + + for (i = 0; i < MAX_CLK_PER_DOMAIN; i++) { + char clk_name[8]; + + snprintf(clk_name, sizeof(clk_name), "clk%d", i); + pd->clk[i] = clk_get(dev, clk_name); + if (IS_ERR(pd->clk[i])) + break; + snprintf(clk_name, sizeof(clk_name), "pclk%d", i); + pd->pclk[i] = clk_get(dev, clk_name); + if (IS_ERR(pd->pclk[i])) { + clk_put(pd->clk[i]); + pd->clk[i] = ERR_PTR(-EINVAL); + break; + } + } + + if (IS_ERR(pd->clk[0])) + clk_put(pd->oscclk); + +no_clk: platform_set_drvdata(pdev, pd); on = __raw_readl(pd->base + 0x4) & S5P_INT_LOCAL_PWR_EN; diff --git a/arch/arm/mach-imx/clk-gate2.c b/arch/arm/mach-imx/clk-gate2.c index 4ba587d..84acdfd 100644 --- a/arch/arm/mach-imx/clk-gate2.c +++ b/arch/arm/mach-imx/clk-gate2.c @@ -67,8 +67,12 @@ static void clk_gate2_disable(struct clk_hw *hw) spin_lock_irqsave(gate->lock, flags); - if (gate->share_count && --(*gate->share_count) > 0) - goto out; + if (gate->share_count) { + if (WARN_ON(*gate->share_count == 0)) + goto out; + else if (--(*gate->share_count) > 0) + goto out; + } reg = readl(gate->reg); reg &= ~(3 << gate->bit_idx); @@ -78,19 +82,26 @@ out: spin_unlock_irqrestore(gate->lock, flags); } -static int clk_gate2_is_enabled(struct clk_hw *hw) +static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx) { - u32 reg; - struct clk_gate2 *gate = to_clk_gate2(hw); + u32 val = readl(reg); - reg = readl(gate->reg); - - if (((reg >> gate->bit_idx) & 1) == 1) + if (((val >> bit_idx) & 1) == 1) return 1; return 0; } +static int clk_gate2_is_enabled(struct clk_hw *hw) +{ + struct clk_gate2 *gate = to_clk_gate2(hw); + + if (gate->share_count) + return !!(*gate->share_count); + else + return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx); +} + static struct clk_ops clk_gate2_ops = { .enable = clk_gate2_enable, .disable = clk_gate2_disable, @@ -116,6 +127,10 @@ struct clk *clk_register_gate2(struct device *dev, const char *name, gate->bit_idx = bit_idx; gate->flags = clk_gate2_flags; gate->lock = lock; + + /* Initialize share_count per hardware state */ + if (share_count) + *share_count = clk_gate2_reg_is_enabled(reg, bit_idx) ? 1 : 0; gate->share_count = share_count; init.name = name; diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 8e795de..8556c78 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -70,7 +70,7 @@ static const char *cko_sels[] = { "cko1", "cko2", }; static const char *lvds_sels[] = { "dummy", "dummy", "dummy", "dummy", "dummy", "dummy", "pll4_audio", "pll5_video", "pll8_mlb", "enet_ref", - "pcie_ref", "sata_ref", + "pcie_ref_125m", "sata_ref_100m", }; enum mx6q_clks { @@ -491,7 +491,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) /* All existing boards with PCIe use LVDS1 */ if (IS_ENABLED(CONFIG_PCI_IMX6)) - clk_set_parent(clk[lvds1_sel], clk[sata_ref]); + clk_set_parent(clk[lvds1_sel], clk[sata_ref_100m]); /* Set initial power mode */ imx6q_set_lpm(WAIT_CLOCKED); diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile index 2ecb828..1636cdb 100644 --- a/arch/arm/mach-mvebu/Makefile +++ b/arch/arm/mach-mvebu/Makefile @@ -7,7 +7,7 @@ CFLAGS_pmsu.o := -march=armv7-a obj-y += system-controller.o mvebu-soc-id.o ifeq ($(CONFIG_MACH_MVEBU_V7),y) -obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o +obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o obj-$(CONFIG_SMP) += platsmp.o headsmp.o platsmp-a9.o headsmp-a9.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o endif diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c index 8bb742f..b2524d6 100644 --- a/arch/arm/mach-mvebu/board-v7.c +++ b/arch/arm/mach-mvebu/board-v7.c @@ -23,6 +23,7 @@ #include <linux/mbus.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/irqchip.h> #include <asm/hardware/cache-l2x0.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -71,17 +72,23 @@ static int armada_375_external_abort_wa(unsigned long addr, unsigned int fsr, return 1; } -static void __init mvebu_timer_and_clk_init(void) +static void __init mvebu_init_irq(void) { - of_clk_init(NULL); - clocksource_of_init(); + irqchip_init(); mvebu_scu_enable(); coherency_init(); BUG_ON(mvebu_mbus_dt_init(coherency_available())); +} + +static void __init external_abort_quirk(void) +{ + u32 dev, rev; - if (of_machine_is_compatible("marvell,armada375")) - hook_fault_code(16 + 6, armada_375_external_abort_wa, SIGBUS, 0, - "imprecise external abort"); + if (mvebu_get_soc_id(&dev, &rev) == 0 && rev > ARMADA_375_Z1_REV) + return; + + hook_fault_code(16 + 6, armada_375_external_abort_wa, SIGBUS, 0, + "imprecise external abort"); } static void __init i2c_quirk(void) @@ -169,8 +176,10 @@ static void __init mvebu_dt_init(void) { if (of_machine_is_compatible("plathome,openblocks-ax3-4")) i2c_quirk(); - if (of_machine_is_compatible("marvell,a375-db")) + if (of_machine_is_compatible("marvell,a375-db")) { + external_abort_quirk(); thermal_quirk(); + } of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } @@ -185,7 +194,7 @@ DT_MACHINE_START(ARMADA_370_XP_DT, "Marvell Armada 370/XP (Device Tree)") .l2c_aux_mask = ~0, .smp = smp_ops(armada_xp_smp_ops), .init_machine = mvebu_dt_init, - .init_time = mvebu_timer_and_clk_init, + .init_irq = mvebu_init_irq, .restart = mvebu_restart, .dt_compat = armada_370_xp_dt_compat, MACHINE_END @@ -198,7 +207,7 @@ static const char * const armada_375_dt_compat[] = { DT_MACHINE_START(ARMADA_375_DT, "Marvell Armada 375 (Device Tree)") .l2c_aux_val = 0, .l2c_aux_mask = ~0, - .init_time = mvebu_timer_and_clk_init, + .init_irq = mvebu_init_irq, .init_machine = mvebu_dt_init, .restart = mvebu_restart, .dt_compat = armada_375_dt_compat, @@ -213,7 +222,7 @@ static const char * const armada_38x_dt_compat[] = { DT_MACHINE_START(ARMADA_38X_DT, "Marvell Armada 380/385 (Device Tree)") .l2c_aux_val = 0, .l2c_aux_mask = ~0, - .init_time = mvebu_timer_and_clk_init, + .init_irq = mvebu_init_irq, .restart = mvebu_restart, .dt_compat = armada_38x_dt_compat, MACHINE_END diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 477202f..2bdc323 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -292,6 +292,10 @@ static struct notifier_block mvebu_hwcc_nb = { .notifier_call = mvebu_hwcc_notifier, }; +static struct notifier_block mvebu_hwcc_pci_nb = { + .notifier_call = mvebu_hwcc_notifier, +}; + static void __init armada_370_coherency_init(struct device_node *np) { struct resource res; @@ -427,7 +431,7 @@ static int __init coherency_pci_init(void) { if (coherency_available()) bus_register_notifier(&pci_bus_type, - &mvebu_hwcc_nb); + &mvebu_hwcc_pci_nb); return 0; } diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 5925366..da5bb29 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -15,6 +15,8 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> + __CPUINIT #define CPU_RESUME_ADDR_REG 0xf10182d4 @@ -22,13 +24,18 @@ .global armada_375_smp_cpu1_enable_code_end armada_375_smp_cpu1_enable_code_start: - ldr r0, [pc, #4] +ARM_BE8(setend be) + adr r0, 1f + ldr r0, [r0] ldr r1, [r0] +ARM_BE8(rev r1, r1) mov pc, r1 +1: .word CPU_RESUME_ADDR_REG armada_375_smp_cpu1_enable_code_end: ENTRY(mvebu_cortex_a9_secondary_startup) +ARM_BE8(setend be) bl v7_invalidate_l1 b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index 53a55c8..25aa823 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -66,6 +66,8 @@ static void __iomem *pmsu_mp_base; extern void ll_disable_coherency(void); extern void ll_enable_coherency(void); +extern void armada_370_xp_cpu_resume(void); + static struct platform_device armada_xp_cpuidle_device = { .name = "cpuidle-armada-370-xp", }; @@ -140,13 +142,6 @@ static void armada_370_xp_pmsu_enable_l2_powerdown_onidle(void) writel(reg, pmsu_mp_base + L2C_NFABRIC_PM_CTL); } -static void armada_370_xp_cpu_resume(void) -{ - asm volatile("bl ll_add_cpu_to_smp_group\n\t" - "bl ll_enable_coherency\n\t" - "b cpu_resume\n\t"); -} - /* No locking is needed because we only access per-CPU registers */ void armada_370_xp_pmsu_idle_prepare(bool deepidle) { @@ -206,12 +201,12 @@ static noinline int do_armada_370_xp_cpu_suspend(unsigned long deepidle) /* Test the CR_C bit and set it if it was cleared */ asm volatile( - "mrc p15, 0, %0, c1, c0, 0 \n\t" - "tst %0, #(1 << 2) \n\t" - "orreq %0, %0, #(1 << 2) \n\t" - "mcreq p15, 0, %0, c1, c0, 0 \n\t" + "mrc p15, 0, r0, c1, c0, 0 \n\t" + "tst r0, #(1 << 2) \n\t" + "orreq r0, r0, #(1 << 2) \n\t" + "mcreq p15, 0, r0, c1, c0, 0 \n\t" "isb " - : : "r" (0)); + : : : "r0"); pr_warn("Failed to suspend the system\n"); diff --git a/arch/arm/mach-mvebu/pmsu_ll.S b/arch/arm/mach-mvebu/pmsu_ll.S new file mode 100644 index 0000000..fc3de68 --- /dev/null +++ b/arch/arm/mach-mvebu/pmsu_ll.S @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2014 Marvell + * + * Thomas Petazzoni <thomas.petazzoni@free-electrons.com> + * Gregory Clement <gregory.clement@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * This is the entry point through which CPUs exiting cpuidle deep + * idle state are going. + */ +ENTRY(armada_370_xp_cpu_resume) +ARM_BE8(setend be ) @ go BE8 if entered LE + bl ll_add_cpu_to_smp_group + bl ll_enable_coherency + b cpu_resume +ENDPROC(armada_370_xp_cpu_resume) + diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 8421f38..8ca99e9 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -110,14 +110,16 @@ obj-y += prm_common.o cm_common.o obj-$(CONFIG_ARCH_OMAP2) += prm2xxx_3xxx.o prm2xxx.o cm2xxx.o obj-$(CONFIG_ARCH_OMAP3) += prm2xxx_3xxx.o prm3xxx.o cm3xxx.o obj-$(CONFIG_ARCH_OMAP3) += vc3xxx_data.o vp3xxx_data.o -obj-$(CONFIG_SOC_AM33XX) += prm33xx.o cm33xx.o omap-prcm-4-5-common = cminst44xx.o cm44xx.o prm44xx.o \ prcm_mpu44xx.o prminst44xx.o \ vc44xx_data.o vp44xx_data.o obj-$(CONFIG_ARCH_OMAP4) += $(omap-prcm-4-5-common) obj-$(CONFIG_SOC_OMAP5) += $(omap-prcm-4-5-common) obj-$(CONFIG_SOC_DRA7XX) += $(omap-prcm-4-5-common) -obj-$(CONFIG_SOC_AM43XX) += $(omap-prcm-4-5-common) +am33xx-43xx-prcm-common += prm33xx.o cm33xx.o +obj-$(CONFIG_SOC_AM33XX) += $(am33xx-43xx-prcm-common) +obj-$(CONFIG_SOC_AM43XX) += $(omap-prcm-4-5-common) \ + $(am33xx-43xx-prcm-common) # OMAP voltage domains voltagedomain-common := voltage.o vc.o vp.o diff --git a/arch/arm/mach-omap2/clkt_dpll.c b/arch/arm/mach-omap2/clkt_dpll.c index 332af92..67fd26a 100644 --- a/arch/arm/mach-omap2/clkt_dpll.c +++ b/arch/arm/mach-omap2/clkt_dpll.c @@ -76,7 +76,7 @@ * (assuming that it is counting N upwards), or -2 if the enclosing loop * should skip to the next iteration (again assuming N is increasing). */ -static int _dpll_test_fint(struct clk_hw_omap *clk, u8 n) +static int _dpll_test_fint(struct clk_hw_omap *clk, unsigned int n) { struct dpll_data *dd; long fint, fint_min, fint_max; diff --git a/arch/arm/mach-omap2/cm-regbits-34xx.h b/arch/arm/mach-omap2/cm-regbits-34xx.h index 04dab2f..ee6c784 100644 --- a/arch/arm/mach-omap2/cm-regbits-34xx.h +++ b/arch/arm/mach-omap2/cm-regbits-34xx.h @@ -26,11 +26,14 @@ #define OMAP3430_EN_WDT3_SHIFT 12 #define OMAP3430_CM_FCLKEN_IVA2_EN_IVA2_MASK (1 << 0) #define OMAP3430_CM_FCLKEN_IVA2_EN_IVA2_SHIFT 0 +#define OMAP3430_IVA2_DPLL_FREQSEL_SHIFT 4 #define OMAP3430_IVA2_DPLL_FREQSEL_MASK (0xf << 4) #define OMAP3430_EN_IVA2_DPLL_DRIFTGUARD_SHIFT 3 +#define OMAP3430_EN_IVA2_DPLL_SHIFT 0 #define OMAP3430_EN_IVA2_DPLL_MASK (0x7 << 0) #define OMAP3430_ST_IVA2_SHIFT 0 #define OMAP3430_ST_IVA2_CLK_MASK (1 << 0) +#define OMAP3430_AUTO_IVA2_DPLL_SHIFT 0 #define OMAP3430_AUTO_IVA2_DPLL_MASK (0x7 << 0) #define OMAP3430_IVA2_CLK_SRC_SHIFT 19 #define OMAP3430_IVA2_CLK_SRC_WIDTH 3 diff --git a/arch/arm/mach-omap2/cm33xx.h b/arch/arm/mach-omap2/cm33xx.h index 15a778c..bd24417 100644 --- a/arch/arm/mach-omap2/cm33xx.h +++ b/arch/arm/mach-omap2/cm33xx.h @@ -380,7 +380,7 @@ void am33xx_cm_clkdm_disable_hwsup(u16 inst, u16 cdoffs); void am33xx_cm_clkdm_force_sleep(u16 inst, u16 cdoffs); void am33xx_cm_clkdm_force_wakeup(u16 inst, u16 cdoffs); -#ifdef CONFIG_SOC_AM33XX +#if defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX) extern int am33xx_cm_wait_module_idle(u16 inst, s16 cdoffs, u16 clkctrl_offs); extern void am33xx_cm_module_enable(u8 mode, u16 inst, s16 cdoffs, diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index a373d50..dc571f1 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -162,7 +162,8 @@ static inline void omap3xxx_restart(enum reboot_mode mode, const char *cmd) } #endif -#if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) +#if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \ + defined(CONFIG_SOC_DRA7XX) || defined(CONFIG_SOC_AM43XX) void omap44xx_restart(enum reboot_mode mode, const char *cmd); #else static inline void omap44xx_restart(enum reboot_mode mode, const char *cmd) @@ -248,7 +249,6 @@ static inline void __iomem *omap4_get_scu_base(void) } #endif -extern void __init gic_init_irq(void); extern void gic_dist_disable(void); extern void gic_dist_enable(void); extern bool gic_dist_disabled(void); diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 592ba0a..b6f8f34 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -297,33 +297,6 @@ static void omap_init_audio(void) static inline void omap_init_audio(void) {} #endif -#if defined(CONFIG_SND_OMAP_SOC_OMAP_HDMI) || \ - defined(CONFIG_SND_OMAP_SOC_OMAP_HDMI_MODULE) - -static struct platform_device omap_hdmi_audio = { - .name = "omap-hdmi-audio", - .id = -1, -}; - -static void __init omap_init_hdmi_audio(void) -{ - struct omap_hwmod *oh; - struct platform_device *pdev; - - oh = omap_hwmod_lookup("dss_hdmi"); - if (!oh) - return; - - pdev = omap_device_build("omap-hdmi-audio-dai", -1, oh, NULL, 0); - WARN(IS_ERR(pdev), - "Can't build omap_device for omap-hdmi-audio-dai.\n"); - - platform_device_register(&omap_hdmi_audio); -} -#else -static inline void omap_init_hdmi_audio(void) {} -#endif - #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE) #include <linux/platform_data/spi-omap2-mcspi.h> @@ -459,7 +432,6 @@ static int __init omap2_init_devices(void) */ omap_init_audio(); omap_init_camera(); - omap_init_hdmi_audio(); omap_init_mbox(); /* If dtb is there, the devices will be created dynamically */ if (!of_have_populated_dt()) { diff --git a/arch/arm/mach-omap2/dsp.c b/arch/arm/mach-omap2/dsp.c index b8208b4..f7492df 100644 --- a/arch/arm/mach-omap2/dsp.c +++ b/arch/arm/mach-omap2/dsp.c @@ -29,6 +29,7 @@ #ifdef CONFIG_TIDSPBRIDGE_DVFS #include "omap-pm.h" #endif +#include "soc.h" #include <linux/platform_data/dsp-omap.h> @@ -59,6 +60,9 @@ void __init omap_dsp_reserve_sdram_memblock(void) phys_addr_t size = CONFIG_TIDSPBRIDGE_MEMPOOL_SIZE; phys_addr_t paddr; + if (!cpu_is_omap34xx()) + return; + if (!size) return; @@ -83,6 +87,9 @@ static int __init omap_dsp_init(void) int err = -ENOMEM; struct omap_dsp_platform_data *pdata = &omap_dsp_pdata; + if (!cpu_is_omap34xx()) + return 0; + pdata->phys_mempool_base = omap_dsp_get_mempool_base(); if (pdata->phys_mempool_base) { @@ -115,6 +122,9 @@ module_init(omap_dsp_init); static void __exit omap_dsp_exit(void) { + if (!cpu_is_omap34xx()) + return; + platform_device_unregister(omap_dsp_pdev); } module_exit(omap_dsp_exit); diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 17cd393..93914d2 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -50,6 +50,16 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) soc_is_omap54xx() || soc_is_dra7xx()) return 1; + if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW || + ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) { + if (cpu_is_omap24xx()) + return 0; + else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0)) + return 0; + else + return 1; + } + /* OMAP3xxx do not have ELM engine, so cannot support ECC schemes * which require H/W based ECC error detection */ if ((cpu_is_omap34xx() || cpu_is_omap3630()) && @@ -57,14 +67,6 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) (ecc_opt == OMAP_ECC_BCH8_CODE_HW))) return 0; - /* - * For now, assume 4-bit mode is only supported on OMAP3630 ES1.x, x>=1 - * and AM33xx derivates. Other chips may be added if confirmed to work. - */ - if ((ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW) && - (!cpu_is_omap3630() || (GET_OMAP_REVISION() == 0))) - return 0; - /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */ if (ecc_opt == OMAP_ECC_HAM1_CODE_HW) return 1; diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 2c0c281..8bc1338 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -1615,7 +1615,7 @@ static int gpmc_probe_dt(struct platform_device *pdev) return ret; } - for_each_child_of_node(pdev->dev.of_node, child) { + for_each_available_child_of_node(pdev->dev.of_node, child) { if (!child->name) continue; diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 43969da..d42022f 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -649,6 +649,18 @@ void __init dra7xxx_check_revision(void) } break; + case 0xb9bc: + switch (rev) { + case 0: + omap_revision = DRA722_REV_ES1_0; + break; + default: + /* If we have no new revisions */ + omap_revision = DRA722_REV_ES1_0; + break; + } + break; + default: /* Unknown default to latest silicon rev as default*/ pr_warn("%s: unknown idcode=0x%08x (hawkeye=0x%08x,rev=0x%d)\n", diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index fd88ede..f62f753 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -183,8 +183,10 @@ static int __init _omap_mux_get_by_name(struct omap_mux_partition *partition, m0_entry = mux->muxnames[0]; /* First check for full name in mode0.muxmode format */ - if (mode0_len && strncmp(muxname, m0_entry, mode0_len)) - continue; + if (mode0_len) + if (strncmp(muxname, m0_entry, mode0_len) || + (strlen(m0_entry) != mode0_len)) + continue; /* Then check for muxmode only */ for (i = 0; i < OMAP_MUX_NR_MODES; i++) { diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 326cd98..a0fe747 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -102,26 +102,6 @@ void __init omap_barriers_init(void) {} #endif -void __init gic_init_irq(void) -{ - void __iomem *omap_irq_base; - - /* Static mapping, never released */ - gic_dist_base_addr = ioremap(OMAP44XX_GIC_DIST_BASE, SZ_4K); - BUG_ON(!gic_dist_base_addr); - - twd_base = ioremap(OMAP44XX_LOCAL_TWD_BASE, SZ_4K); - BUG_ON(!twd_base); - - /* Static mapping, never released */ - omap_irq_base = ioremap(OMAP44XX_GIC_CPU_BASE, SZ_512); - BUG_ON(!omap_irq_base); - - omap_wakeupgen_init(); - - gic_init(0, 29, gic_dist_base_addr, omap_irq_base); -} - void gic_dist_disable(void) { if (gic_dist_base_addr) @@ -188,6 +168,10 @@ static void omap4_l2c310_write_sec(unsigned long val, unsigned reg) smc_op = OMAP4_MON_L2X0_PREFETCH_INDEX; break; + case L310_POWER_CTRL: + pr_info_once("OMAP L2C310: ROM does not support power control setting\n"); + return; + default: WARN_ONCE(1, "OMAP L2C310: ignoring write to reg 0x%x\n", reg); return; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index f7bb435..6c074f3 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -4251,9 +4251,9 @@ void __init omap_hwmod_init(void) soc_ops.enable_module = _omap4_enable_module; soc_ops.disable_module = _omap4_disable_module; soc_ops.wait_target_ready = _omap4_wait_target_ready; - soc_ops.assert_hardreset = _omap4_assert_hardreset; - soc_ops.deassert_hardreset = _omap4_deassert_hardreset; - soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted; + soc_ops.assert_hardreset = _am33xx_assert_hardreset; + soc_ops.deassert_hardreset = _am33xx_deassert_hardreset; + soc_ops.is_hardreset_asserted = _am33xx_is_hardreset_asserted; soc_ops.init_clkdm = _init_clkdm; } else if (soc_is_am33xx()) { soc_ops.enable_module = _am33xx_enable_module; diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c index 290213f..1103aa0 100644 --- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c @@ -2020,6 +2020,77 @@ static struct omap_hwmod omap54xx_wd_timer2_hwmod = { }, }; +/* + * 'ocp2scp' class + * bridge to transform ocp interface protocol to scp (serial control port) + * protocol + */ +/* ocp2scp3 */ +static struct omap_hwmod omap54xx_ocp2scp3_hwmod; +/* l4_cfg -> ocp2scp3 */ +static struct omap_hwmod_ocp_if omap54xx_l4_cfg__ocp2scp3 = { + .master = &omap54xx_l4_cfg_hwmod, + .slave = &omap54xx_ocp2scp3_hwmod, + .clk = "l4_root_clk_div", + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; + +static struct omap_hwmod omap54xx_ocp2scp3_hwmod = { + .name = "ocp2scp3", + .class = &omap54xx_ocp2scp_hwmod_class, + .clkdm_name = "l3init_clkdm", + .prcm = { + .omap4 = { + .clkctrl_offs = OMAP54XX_CM_L3INIT_OCP2SCP3_CLKCTRL_OFFSET, + .context_offs = OMAP54XX_RM_L3INIT_OCP2SCP3_CONTEXT_OFFSET, + .modulemode = MODULEMODE_HWCTRL, + }, + }, +}; + +/* + * 'sata' class + * sata: serial ata interface gen2 compliant ( 1 rx/ 1 tx) + */ + +static struct omap_hwmod_class_sysconfig omap54xx_sata_sysc = { + .sysc_offs = 0x0000, + .sysc_flags = (SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | + SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO | + MSTANDBY_SMART | MSTANDBY_SMART_WKUP), + .sysc_fields = &omap_hwmod_sysc_type2, +}; + +static struct omap_hwmod_class omap54xx_sata_hwmod_class = { + .name = "sata", + .sysc = &omap54xx_sata_sysc, +}; + +/* sata */ +static struct omap_hwmod omap54xx_sata_hwmod = { + .name = "sata", + .class = &omap54xx_sata_hwmod_class, + .clkdm_name = "l3init_clkdm", + .flags = HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY, + .main_clk = "func_48m_fclk", + .mpu_rt_idx = 1, + .prcm = { + .omap4 = { + .clkctrl_offs = OMAP54XX_CM_L3INIT_SATA_CLKCTRL_OFFSET, + .context_offs = OMAP54XX_RM_L3INIT_SATA_CONTEXT_OFFSET, + .modulemode = MODULEMODE_SWCTRL, + }, + }, +}; + +/* l4_cfg -> sata */ +static struct omap_hwmod_ocp_if omap54xx_l4_cfg__sata = { + .master = &omap54xx_l4_cfg_hwmod, + .slave = &omap54xx_sata_hwmod, + .clk = "l3_iclk_div", + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; /* * Interfaces @@ -2765,6 +2836,8 @@ static struct omap_hwmod_ocp_if *omap54xx_hwmod_ocp_ifs[] __initdata = { &omap54xx_l4_cfg__usb_tll_hs, &omap54xx_l4_cfg__usb_otg_ss, &omap54xx_l4_wkup__wd_timer2, + &omap54xx_l4_cfg__ocp2scp3, + &omap54xx_l4_cfg__sata, NULL, }; diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index 20b4398..284324f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -1268,9 +1268,6 @@ static struct omap_hwmod_class dra7xx_sata_hwmod_class = { }; /* sata */ -static struct omap_hwmod_opt_clk sata_opt_clks[] = { - { .role = "ref_clk", .clk = "sata_ref_clk" }, -}; static struct omap_hwmod dra7xx_sata_hwmod = { .name = "sata", @@ -1278,6 +1275,7 @@ static struct omap_hwmod dra7xx_sata_hwmod = { .clkdm_name = "l3init_clkdm", .flags = HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY, .main_clk = "func_48m_fclk", + .mpu_rt_idx = 1, .prcm = { .omap4 = { .clkctrl_offs = DRA7XX_CM_L3INIT_SATA_CLKCTRL_OFFSET, @@ -1285,8 +1283,6 @@ static struct omap_hwmod dra7xx_sata_hwmod = { .modulemode = MODULEMODE_SWCTRL, }, }, - .opt_clks = sata_opt_clks, - .opt_clks_cnt = ARRAY_SIZE(sata_opt_clks), }; /* @@ -1731,8 +1727,20 @@ static struct omap_hwmod dra7xx_uart6_hwmod = { * */ +static struct omap_hwmod_class_sysconfig dra7xx_usb_otg_ss_sysc = { + .rev_offs = 0x0000, + .sysc_offs = 0x0010, + .sysc_flags = (SYSC_HAS_DMADISABLE | SYSC_HAS_MIDLEMODE | + SYSC_HAS_SIDLEMODE), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | + SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO | + MSTANDBY_SMART | MSTANDBY_SMART_WKUP), + .sysc_fields = &omap_hwmod_sysc_type2, +}; + static struct omap_hwmod_class dra7xx_usb_otg_ss_hwmod_class = { .name = "usb_otg_ss", + .sysc = &dra7xx_usb_otg_ss_sysc, }; /* usb_otg_ss1 */ diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h index 106132d..cbefbd7 100644 --- a/arch/arm/mach-omap2/prm-regbits-34xx.h +++ b/arch/arm/mach-omap2/prm-regbits-34xx.h @@ -35,6 +35,8 @@ #define OMAP3430_LOGICSTATEST_MASK (1 << 2) #define OMAP3430_LASTLOGICSTATEENTERED_MASK (1 << 2) #define OMAP3430_LASTPOWERSTATEENTERED_MASK (0x3 << 0) +#define OMAP3430_GRPSEL_MCBSP5_MASK (1 << 10) +#define OMAP3430_GRPSEL_MCBSP1_MASK (1 << 9) #define OMAP3630_GRPSEL_UART4_MASK (1 << 18) #define OMAP3430_GRPSEL_GPIO6_MASK (1 << 17) #define OMAP3430_GRPSEL_GPIO5_MASK (1 << 16) @@ -42,6 +44,10 @@ #define OMAP3430_GRPSEL_GPIO3_MASK (1 << 14) #define OMAP3430_GRPSEL_GPIO2_MASK (1 << 13) #define OMAP3430_GRPSEL_UART3_MASK (1 << 11) +#define OMAP3430_GRPSEL_GPT8_MASK (1 << 9) +#define OMAP3430_GRPSEL_GPT7_MASK (1 << 8) +#define OMAP3430_GRPSEL_GPT6_MASK (1 << 7) +#define OMAP3430_GRPSEL_GPT5_MASK (1 << 6) #define OMAP3430_GRPSEL_MCBSP4_MASK (1 << 2) #define OMAP3430_GRPSEL_MCBSP3_MASK (1 << 1) #define OMAP3430_GRPSEL_MCBSP2_MASK (1 << 0) diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h index de2a34c..01ca808 100644 --- a/arch/arm/mach-omap2/soc.h +++ b/arch/arm/mach-omap2/soc.h @@ -462,6 +462,7 @@ IS_OMAP_TYPE(3430, 0x3430) #define DRA7XX_CLASS 0x07000000 #define DRA752_REV_ES1_0 (DRA7XX_CLASS | (0x52 << 16) | (0x10 << 8)) #define DRA752_REV_ES1_1 (DRA7XX_CLASS | (0x52 << 16) | (0x11 << 8)) +#define DRA722_REV_ES1_0 (DRA7XX_CLASS | (0x22 << 16) | (0x10 << 8)) void omap2xxx_check_revision(void); void omap3xxx_check_revision(void); diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 3f9587b..b608508 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -12,8 +12,81 @@ #include <linux/clk-provider.h> #include <linux/clocksource.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> +#include <asm/mach/map.h> +#include <asm/system_misc.h> + +#define SUN4I_WATCHDOG_CTRL_REG 0x00 +#define SUN4I_WATCHDOG_CTRL_RESTART BIT(0) +#define SUN4I_WATCHDOG_MODE_REG 0x04 +#define SUN4I_WATCHDOG_MODE_ENABLE BIT(0) +#define SUN4I_WATCHDOG_MODE_RESET_ENABLE BIT(1) + +#define SUN6I_WATCHDOG1_IRQ_REG 0x00 +#define SUN6I_WATCHDOG1_CTRL_REG 0x10 +#define SUN6I_WATCHDOG1_CTRL_RESTART BIT(0) +#define SUN6I_WATCHDOG1_CONFIG_REG 0x14 +#define SUN6I_WATCHDOG1_CONFIG_RESTART BIT(0) +#define SUN6I_WATCHDOG1_CONFIG_IRQ BIT(1) +#define SUN6I_WATCHDOG1_MODE_REG 0x18 +#define SUN6I_WATCHDOG1_MODE_ENABLE BIT(0) + +static void __iomem *wdt_base; + +static void sun4i_restart(enum reboot_mode mode, const char *cmd) +{ + if (!wdt_base) + return; + + /* Enable timer and set reset bit in the watchdog */ + writel(SUN4I_WATCHDOG_MODE_ENABLE | SUN4I_WATCHDOG_MODE_RESET_ENABLE, + wdt_base + SUN4I_WATCHDOG_MODE_REG); + + /* + * Restart the watchdog. The default (and lowest) interval + * value for the watchdog is 0.5s. + */ + writel(SUN4I_WATCHDOG_CTRL_RESTART, wdt_base + SUN4I_WATCHDOG_CTRL_REG); + + while (1) { + mdelay(5); + writel(SUN4I_WATCHDOG_MODE_ENABLE | SUN4I_WATCHDOG_MODE_RESET_ENABLE, + wdt_base + SUN4I_WATCHDOG_MODE_REG); + } +} + +static struct of_device_id sunxi_restart_ids[] = { + { .compatible = "allwinner,sun4i-a10-wdt" }, + { /*sentinel*/ } +}; + +static void sunxi_setup_restart(void) +{ + struct device_node *np; + + np = of_find_matching_node(NULL, sunxi_restart_ids); + if (WARN(!np, "unable to setup watchdog restart")) + return; + + wdt_base = of_iomap(np, 0); + WARN(!wdt_base, "failed to map watchdog base address"); +} + +static void __init sunxi_dt_init(void) +{ + sunxi_setup_restart(); + + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); +} static const char * const sunxi_board_dt_compat[] = { "allwinner,sun4i-a10", @@ -23,7 +96,9 @@ static const char * const sunxi_board_dt_compat[] = { }; DT_MACHINE_START(SUNXI_DT, "Allwinner A1X (Device Tree)") + .init_machine = sunxi_dt_init, .dt_compat = sunxi_board_dt_compat, + .restart = sun4i_restart, MACHINE_END static const char * const sun6i_board_dt_compat[] = { @@ -51,5 +126,7 @@ static const char * const sun7i_board_dt_compat[] = { }; DT_MACHINE_START(SUN7I_DT, "Allwinner sun7i (A20) Family") + .init_machine = sunxi_dt_init, .dt_compat = sun7i_board_dt_compat, + .restart = sun4i_restart, MACHINE_END diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 076172b..7c3fb41 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -664,7 +664,7 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) { - unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK; + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; if (rev >= L310_CACHE_ID_RTL_R2P0) { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4c88935..1f88db0 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -461,12 +461,21 @@ void __init dma_contiguous_remap(void) map.type = MT_MEMORY_DMA_READY; /* - * Clear previous low-memory mapping + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. */ for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); + iotable_init(&map, 1); } } diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 8e0e52e..c447ec7 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -9,6 +9,11 @@ #include <asm/sections.h> #include <asm/system_info.h> +/* + * Note: accesses outside of the kernel image and the identity map area + * are not supported on any CPU using the idmap tables as its current + * page tables. + */ pgd_t *idmap_pgd; phys_addr_t (*arch_virt_to_idmap) (unsigned long x); @@ -25,6 +30,13 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, pr_warning("Failed to allocate identity pmd.\n"); return; } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); pud_populate(&init_mm, pud, pmd); pmd += pmd_index(addr); } else diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ab14b79..6e3ba8d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1406,8 +1406,8 @@ void __init early_paging_init(const struct machine_desc *mdesc, return; /* remap kernel code and data */ - map_start = init_mm.start_code; - map_end = init_mm.brk; + map_start = init_mm.start_code & PMD_MASK; + map_end = ALIGN(init_mm.brk, PMD_SIZE); /* get a handle on things... */ pgd0 = pgd_offset_k(0); @@ -1442,7 +1442,7 @@ void __init early_paging_init(const struct machine_desc *mdesc, } /* remap pmds for kernel mapping */ - phys = __pa(map_start) & PMD_MASK; + phys = __pa(map_start); do { *pmdk++ = __pmd(phys | pmdprot); phys += PMD_SIZE; diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c index 859a9bb..91cf08b 100644 --- a/arch/arm/xen/grant-table.c +++ b/arch/arm/xen/grant-table.c @@ -51,3 +51,8 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, { return -ENOSYS; } + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) +{ + return 0; +} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a474de34..839f48c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,6 +4,7 @@ config ARM64 select ARCH_HAS_OPP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 60f2f4c..79cd911 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 993bce5..902eb70 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -56,6 +56,8 @@ #define TASK_SIZE_32 UL(0x100000000) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) #else #define TASK_SIZE TASK_SIZE_64 #endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5797020..e0ccceb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -292,7 +292,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) -#ifdef ARM64_64K_PAGES +#ifdef CONFIG_ARM64_64K_PAGES #define pud_sect(pud) (0) #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a429b59..501000f 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,6 +21,10 @@ #include <uapi/asm/ptrace.h> +/* Current Exception Level values, as contained in CurrentEL */ +#define CurrentEL_EL1 (1 << 2) +#define CurrentEL_EL2 (2 << 2) + /* AArch32-specific ptrace requests */ #define COMPAT_PTRACE_GETREGS 12 #define COMPAT_PTRACE_SETREGS 13 diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 66716c9..619b1dd 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -78,8 +78,7 @@ ENTRY(efi_stub_entry) /* Turn off Dcache and MMU */ mrs x0, CurrentEL - cmp x0, #PSR_MODE_EL2t - ccmp x0, #PSR_MODE_EL2h, #0x4, ne + cmp x0, #CurrentEL_EL2 b.ne 1f mrs x0, sctlr_el2 bic x0, x0, #1 << 0 // clear SCTLR.M diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c index 60e98a63..e786e6c 100644 --- a/arch/arm64/kernel/efi-stub.c +++ b/arch/arm64/kernel/efi-stub.c @@ -12,8 +12,6 @@ #include <linux/efi.h> #include <linux/libfdt.h> #include <asm/sections.h> -#include <generated/compile.h> -#include <generated/utsrelease.h> /* * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a96d3a6..a2c1195 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -270,8 +270,7 @@ ENDPROC(stext) */ ENTRY(el2_setup) mrs x0, CurrentEL - cmp x0, #PSR_MODE_EL2t - ccmp x0, #PSR_MODE_EL2h, #0x4, ne + cmp x0, #CurrentEL_EL2 b.ne 1f mrs x0, sctlr_el2 CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 9aecbac..13bbc3be 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -27,8 +27,10 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) copy_page(kto, kfrom); __flush_dcache_area(kto, PAGE_SIZE); } +EXPORT_SYMBOL_GPL(__cpu_copy_user_page); void __cpu_clear_user_page(void *kaddr, unsigned long vaddr) { clear_page(kaddr); } +EXPORT_SYMBOL_GPL(__cpu_clear_user_page); diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index e4193e3..0d64089 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -79,7 +79,8 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) return; if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), PAGE_SIZE); + __flush_dcache_area(page_address(page), + PAGE_SIZE << compound_order(page)); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f43db8a..e90c542 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -60,6 +60,17 @@ static int __init early_initrd(char *p) early_param("initrd", early_initrd); #endif +/* + * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It + * currently assumes that for memory starting above 4G, 32-bit devices will + * use a DMA offset. + */ +static phys_addr_t max_zone_dma_phys(void) +{ + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); + return min(offset + (1ULL << 32), memblock_end_of_DRAM()); +} + static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; @@ -70,9 +81,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) { - unsigned long max_dma_phys = - (unsigned long)(dma_to_phys(NULL, DMA_BIT_MASK(32)) + 1); - max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + max_dma = PFN_DOWN(max_zone_dma_phys()); zone_size[ZONE_DMA] = max_dma - min; } zone_size[ZONE_NORMAL] = max - max_dma; @@ -146,7 +155,7 @@ void __init arm64_memblock_init(void) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) - dma_phys_limit = dma_to_phys(NULL, DMA_BIT_MASK(32)) + 1; + dma_phys_limit = max_zone_dma_phys(); dma_contiguous_reserve(dma_phys_limit); memblock_allow_resize(); diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index a7e9bfd..fcec5ce 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -102,7 +102,7 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_BLACKFIN_TWI=y CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 CONFIG_SPI=y -CONFIG_SPI_BFIN_V3=y +CONFIG_SPI_ADI_V3=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index ba35864..c9eec84 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -145,7 +145,7 @@ SECTIONS .text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data)) #else - .init.data : AT(__data_lma + __data_len) + .init.data : AT(__data_lma + __data_len + 32) { __sinitdata = .; INIT_DATA diff --git a/arch/blackfin/mach-bf533/boards/blackstamp.c b/arch/blackfin/mach-bf533/boards/blackstamp.c index 63b0e4f..0ccf0cf 100644 --- a/arch/blackfin/mach-bf533/boards/blackstamp.c +++ b/arch/blackfin/mach-bf533/boards/blackstamp.c @@ -20,6 +20,7 @@ #include <linux/spi/spi.h> #include <linux/spi/flash.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <linux/i2c.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c index c65c6db..1e7290e 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c @@ -21,6 +21,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c index af58454..c7495dc 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c @@ -21,6 +21,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c index a021122..6b988ad 100644 --- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c @@ -21,6 +21,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 90138e6..1fe7ff2 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -2118,7 +2118,7 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary", "pinctrl-adi2.0", NULL, "rotary"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0", "pinctrl-adi2.0", NULL, "can0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.1", "pinctrl-adi2.0", NULL, "can1"), - PIN_MAP_MUX_GROUP_DEFAULT("bf54x-lq043", "pinctrl-adi2.0", NULL, "ppi0_24b"), + PIN_MAP_MUX_GROUP_DEFAULT("bf54x-lq043", "pinctrl-adi2.0", "ppi0_24bgrp", "ppi0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-tdm.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-ac97.0", "pinctrl-adi2.0", NULL, "sport0"), @@ -2140,7 +2140,9 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("pata-bf54x", "pinctrl-adi2.0", NULL, "atapi_alter"), #endif PIN_MAP_MUX_GROUP_DEFAULT("bf5xx-nand.0", "pinctrl-adi2.0", NULL, "nfc0"), - PIN_MAP_MUX_GROUP_DEFAULT("bf54x-keys", "pinctrl-adi2.0", NULL, "keys_4x4"), + PIN_MAP_MUX_GROUP_DEFAULT("bf54x-keys", "pinctrl-adi2.0", "keys_4x4grp", "keys"), + PIN_MAP_MUX_GROUP("bf54x-keys", "4bit", "pinctrl-adi2.0", "keys_4x4grp", "keys"), + PIN_MAP_MUX_GROUP("bf54x-keys", "8bit", "pinctrl-adi2.0", "keys_8x8grp", "keys"), }; static int __init ezkit_init(void) diff --git a/arch/blackfin/mach-bf561/boards/acvilon.c b/arch/blackfin/mach-bf561/boards/acvilon.c index 430b16d..6ab9515 100644 --- a/arch/blackfin/mach-bf561/boards/acvilon.c +++ b/arch/blackfin/mach-bf561/boards/acvilon.c @@ -44,6 +44,7 @@ #include <linux/spi/flash.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/gpio.h> #include <linux/jiffies.h> #include <linux/i2c-pca-platform.h> #include <linux/delay.h> diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index 9f777df..e862f78 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -18,6 +18,7 @@ #endif #include <linux/ata_platform.h> #include <linux/irq.h> +#include <linux/gpio.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> #include <asm/portmux.h> diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index 88dee43..2de71e8 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -14,6 +14,7 @@ #include <linux/spi/spi.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/gpio.h> #include <linux/delay.h> #include <asm/dma.h> #include <asm/bfin5xx_spi.h> diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 1ba4600..e2c0b02 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -698,8 +698,6 @@ int bf609_nor_flash_init(struct platform_device *pdev) { #define CONFIG_SMC_GCTL_VAL 0x00000010 - if (!devm_pinctrl_get_select_default(&pdev->dev)) - return -EBUSY; bfin_write32(SMC_GCTL, CONFIG_SMC_GCTL_VAL); bfin_write32(SMC_B0CTL, 0x01002011); bfin_write32(SMC_B0TIM, 0x08170977); @@ -709,7 +707,6 @@ int bf609_nor_flash_init(struct platform_device *pdev) void bf609_nor_flash_exit(struct platform_device *pdev) { - devm_pinctrl_put(pdev->dev.pins->p); bfin_write32(SMC_GCTL, 0); } @@ -2058,15 +2055,14 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary", "pinctrl-adi2.0", NULL, "rotary"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0", "pinctrl-adi2.0", NULL, "can0"), PIN_MAP_MUX_GROUP_DEFAULT("physmap-flash.0", "pinctrl-adi2.0", NULL, "smc0"), - PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2", "pinctrl-adi2.0", NULL, "ppi2_16b"), - PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0", "pinctrl-adi2.0", NULL, "ppi0_16b"), -#if IS_ENABLED(CONFIG_VIDEO_MT9M114) - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_8b"), -#elif IS_ENABLED(CONFIG_VIDEO_VS6624) - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_16b"), -#else - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_24b"), -#endif + PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_display.0", "8bit", "pinctrl-adi2.0", "ppi2_8bgrp", "ppi2"), + PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_display.0", "16bit", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "8bit", "pinctrl-adi2.0", "ppi0_8bgrp", "ppi0"), + PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", "ppi0_16bgrp", "ppi0"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "16bit", "pinctrl-adi2.0", "ppi0_16bgrp", "ppi0"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "24bit", "pinctrl-adi2.0", "ppi0_24bgrp", "ppi0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-tdm.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.1", "pinctrl-adi2.0", NULL, "sport1"), diff --git a/arch/blackfin/mach-bf609/include/mach/pm.h b/arch/blackfin/mach-bf609/include/mach/pm.h index 3ca0fb9..a1efd93 100644 --- a/arch/blackfin/mach-bf609/include/mach/pm.h +++ b/arch/blackfin/mach-bf609/include/mach/pm.h @@ -10,6 +10,7 @@ #define __MACH_BF609_PM_H__ #include <linux/suspend.h> +#include <linux/platform_device.h> extern int bfin609_pm_enter(suspend_state_t state); extern int bf609_pm_prepare(void); @@ -19,6 +20,6 @@ void bf609_hibernate(void); void bfin_sec_raise_irq(unsigned int sid); void coreb_enable(void); -int bf609_nor_flash_init(void); -void bf609_nor_flash_exit(void); +int bf609_nor_flash_init(struct platform_device *pdev); +void bf609_nor_flash_exit(struct platform_device *pdev); #endif diff --git a/arch/blackfin/mach-bf609/pm.c b/arch/blackfin/mach-bf609/pm.c index 0cdd695..b1bfcf4 100644 --- a/arch/blackfin/mach-bf609/pm.c +++ b/arch/blackfin/mach-bf609/pm.c @@ -291,13 +291,13 @@ static struct bfin_cpu_pm_fns bf609_cpu_pm = { #if defined(CONFIG_MTD_PHYSMAP) || defined(CONFIG_MTD_PHYSMAP_MODULE) static int smc_pm_syscore_suspend(void) { - bf609_nor_flash_exit(); + bf609_nor_flash_exit(NULL); return 0; } static void smc_pm_syscore_resume(void) { - bf609_nor_flash_init(); + bf609_nor_flash_init(NULL); } static struct syscore_ops smc_pm_syscore_ops = { diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 867b7ce..1f94784 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -1208,8 +1208,6 @@ int __init init_arch_irq(void) bfin_sec_set_priority(CONFIG_SEC_IRQ_PRIORITY_LEVELS, sec_int_priority); - bfin_sec_set_priority(CONFIG_SEC_IRQ_PRIORITY_LEVELS, sec_int_priority); - /* Enable interrupts IVG7-15 */ bfin_irq_flags |= IMASK_IVG15 | IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 | diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index 1fe9aa5..ec73b2c 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/vgaarb.h> +#include <linux/screen_info.h> #include <asm/machvec.h> @@ -37,6 +38,27 @@ static void pci_fixup_video(struct pci_dev *pdev) return; /* Maybe, this machine supports legacy memory map. */ + if (!vga_default_device()) { + resource_size_t start, end; + int i; + + /* Does firmware framebuffer belong to us? */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + + start = pci_resource_start(pdev, i); + end = pci_resource_end(pdev, i); + + if (!start || !end) + continue; + + if (screen_info.lfb_base >= start && + (screen_info.lfb_base + screen_info.lfb_size) < end) + vga_set_default_device(pdev); + } + } + /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index dbb118e..a5478845 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -921,7 +921,8 @@ L(nocon): jls 1f lsrl #1,%d1 1: - movel %d1,m68k_init_mapped_size + lea %pc@(m68k_init_mapped_size),%a0 + movel %d1,%a0@ mmu_map #PAGE_OFFSET,%pc@(L(phys_kernel_start)),%d1,\ %pc@(m68k_supervisor_cachemode) diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 958f1ad..3857737 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -11,6 +11,7 @@ */ #include <linux/errno.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -30,6 +31,7 @@ unsigned long (*mach_random_get_entropy)(void); +EXPORT_SYMBOL_GPL(mach_random_get_entropy); /* diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index cd5e4f5..f3c56a1 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -384,6 +384,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kfree(vcpu->arch.guest_ebase); kfree(vcpu->arch.kseg0_commpage); + kfree(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index a2fa2971..f5645d6 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -69,8 +69,6 @@ #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -#define SA_RESTORER 0x04000000 /* obsolete -- ignored */ - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/parisc/kernel/hardware.c b/arch/parisc/kernel/hardware.c index 608716f..af3bc35 100644 --- a/arch/parisc/kernel/hardware.c +++ b/arch/parisc/kernel/hardware.c @@ -1210,7 +1210,8 @@ static struct hp_hardware hp_hardware_list[] = { {HPHW_FIO, 0x004, 0x00320, 0x0, "Metheus Frame Buffer"}, {HPHW_FIO, 0x004, 0x00340, 0x0, "BARCO CX4500 VME Grphx Cnsl"}, {HPHW_FIO, 0x004, 0x00360, 0x0, "Hughes TOG VME FDDI"}, - {HPHW_FIO, 0x076, 0x000AD, 0x00, "Crestone Peak RS-232"}, + {HPHW_FIO, 0x076, 0x000AD, 0x0, "Crestone Peak Core RS-232"}, + {HPHW_FIO, 0x077, 0x000AD, 0x0, "Crestone Peak Fast? Core RS-232"}, {HPHW_IOA, 0x185, 0x0000B, 0x00, "Java BC Summit Port"}, {HPHW_IOA, 0x1FF, 0x0000B, 0x00, "Hitachi Ghostview Summit Port"}, {HPHW_IOA, 0x580, 0x0000B, 0x10, "U2-IOA BC Runway Port"}, diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index bb9f3b6..93c1963 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -4,6 +4,7 @@ * Copyright (C) 2000-2001 Hewlett Packard Company * Copyright (C) 2000 John Marvin * Copyright (C) 2001 Matthew Wilcox + * Copyright (C) 2014 Helge Deller <deller@gmx.de> * * These routines maintain argument size conversion between 32bit and 64bit * environment. Based heavily on sys_ia32.c and sys_sparc32.c. @@ -11,44 +12,8 @@ #include <linux/compat.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/file.h> -#include <linux/signal.h> -#include <linux/resource.h> -#include <linux/times.h> -#include <linux/time.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/shm.h> -#include <linux/slab.h> -#include <linux/uio.h> -#include <linux/ncp_fs.h> -#include <linux/poll.h> -#include <linux/personality.h> -#include <linux/stat.h> -#include <linux/highmem.h> -#include <linux/highuid.h> -#include <linux/mman.h> -#include <linux/binfmts.h> -#include <linux/namei.h> -#include <linux/vfs.h> -#include <linux/ptrace.h> -#include <linux/swap.h> #include <linux/syscalls.h> -#include <asm/types.h> -#include <asm/uaccess.h> -#include <asm/mmu_context.h> - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x) printk x -#else -#define DBG(x) -#endif asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, int r22, int r21, int r20) @@ -57,3 +22,12 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, current->comm, current->pid, r20); return -ENOSYS; } + +asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags, + compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd, + const char __user * pathname) +{ + return sys_fanotify_mark(fanotify_fd, flags, + ((__u64)mask1 << 32) | mask0, + dfd, pathname); +} diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index c5fa7a6..84c5d3a 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -418,7 +418,7 @@ ENTRY_SAME(accept4) /* 320 */ ENTRY_SAME(prlimit64) ENTRY_SAME(fanotify_init) - ENTRY_COMP(fanotify_mark) + ENTRY_DIFF(fanotify_mark) ENTRY_COMP(clock_adjtime) ENTRY_SAME(name_to_handle_at) /* 325 */ ENTRY_COMP(open_by_handle_at) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ae085ad..0bef864 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -728,7 +728,6 @@ static void __init pagetable_init(void) #endif empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); } static void __init gateway_init(void) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bd6dd6e..80b94b0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -145,6 +145,7 @@ config PPC select HAVE_IRQ_EXIT_ON_IRQ_STACK select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -414,7 +415,7 @@ config KEXEC config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) - select RELOCATABLE if PPC64 || 44x || FSL_BOOKE + select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE help Build a kernel suitable for use as a kdump capture kernel. The same kernel binary can be used as production kernel and dump @@ -1017,6 +1018,7 @@ endmenu if PPC64 config RELOCATABLE bool "Build a relocatable kernel" + depends on !COMPILE_TEST select NONSTATIC_KERNEL help This builds a kernel image that is capable of running anywhere diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi index f75b4f820..7d4a6a2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi @@ -32,7 +32,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - compatible = "fsl,sec-v6.0"; + compatible = "fsl,sec-v6.0", "fsl,sec-v5.0", + "fsl,sec-v4.0"; fsl,sec-era = <6>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index bc23477..0fdd7ee 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -447,6 +447,7 @@ extern const char *powerpc_base_platform; CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) +#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b..d645428 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -198,8 +198,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) { + int size, a_psize; /* Look at the 8 bit LP value */ unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); @@ -214,14 +216,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) continue; a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) + if (a_psize != -1) { + if (is_base_size) + return 1ul << mmu_psize_defs[size].shift; return 1ul << mmu_psize_defs[a_psize].shift; + } } } return 0; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 807014d..c2b4dcf 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -22,6 +22,7 @@ */ #include <asm/pgtable-ppc64.h> #include <asm/bug.h> +#include <asm/processor.h> /* * Segment table @@ -496,7 +497,7 @@ extern void slb_set_size(u16 size); */ struct subpage_prot_table { unsigned long maxaddr; /* only addresses < this are protected */ - unsigned int **protptrs[2]; + unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; unsigned int *low_prot[4]; }; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index f8d1d6d..e61f24e 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -19,8 +19,7 @@ #define MMU_FTR_TYPE_40x ASM_CONST(0x00000004) #define MMU_FTR_TYPE_44x ASM_CONST(0x00000008) #define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010) -#define MMU_FTR_TYPE_3E ASM_CONST(0x00000020) -#define MMU_FTR_TYPE_47x ASM_CONST(0x00000040) +#define MMU_FTR_TYPE_47x ASM_CONST(0x00000020) /* * This is individual features @@ -106,13 +105,6 @@ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B -#define MMU_FTRS_A2 MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \ - MMU_FTR_USE_TLBIVAX_BCAST | \ - MMU_FTR_LOCK_BCAST_INVAL | \ - MMU_FTR_USE_TLBRSRV | \ - MMU_FTR_USE_PAIRED_MAS | \ - MMU_FTR_TLBIEL | \ - MMU_FTR_16M_PAGE #ifndef __ASSEMBLY__ #include <asm/cputable.h> diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 9ed73714..b3e9360 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -61,8 +61,7 @@ struct power_pmu { #define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */ #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ -#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ -#define PPMU_EBB 0x00000100 /* supports event based branch */ +#define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 9ea266e..7e46125 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -277,6 +277,8 @@ n: .globl n; \ n: +#define _GLOBAL_TOC(name) _GLOBAL(name) + #define _KPROBE(n) \ .section ".kprobes.text","a"; \ .globl n; \ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 965291b..0c15764 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -527,6 +527,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8 DD1: Does not support doorbell IPIs */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004d0100, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8_DD1, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004d0000, diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 2480256..5cf3d36 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -131,7 +131,7 @@ _GLOBAL(power7_nap) _GLOBAL(power7_sleep) li r3,1 - li r4,0 + li r4,1 b power7_powersave_common /* No return */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b49c72f..b2814e2 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -123,21 +123,12 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, void pcibios_reset_secondary_bus(struct pci_dev *dev) { - u16 ctrl; - if (ppc_md.pcibios_reset_secondary_bus) { ppc_md.pcibios_reset_secondary_bus(dev); return; } - pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - msleep(2); - - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - ssleep(1); + pci_reset_secondary_bus(dev); } static resource_size_t pcibios_io_size(const struct pci_controller *hose) diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 658e89d..db2b482 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -611,17 +611,19 @@ static void rtas_flash_firmware(int reboot_type) for (f = flist; f; f = next) { /* Translate data addrs to absolute */ for (i = 0; i < f->num_blocks; i++) { - f->blocks[i].data = (char *)__pa(f->blocks[i].data); + f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data)); image_size += f->blocks[i].length; + f->blocks[i].length = cpu_to_be64(f->blocks[i].length); } next = f->next; /* Don't translate NULL pointer for last entry */ if (f->next) - f->next = (struct flash_block_list *)__pa(f->next); + f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next)); else f->next = NULL; /* make num_blocks into the version/length field */ f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + f->num_blocks = cpu_to_be64(f->num_blocks); } printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 51a3ff7..1007fb8 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -747,7 +747,7 @@ int setup_profiling_timer(unsigned int multiplier) #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ -static const int powerpc_smt_flags(void) +static int powerpc_smt_flags(void) { int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8056107..68468d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1562,7 +1562,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!rma_setup && is_vrma_hpte(v)) { - unsigned long psize = hpte_page_size(v, r); + unsigned long psize = hpte_base_page_size(v, r); unsigned long senc = slb_pgsize_encoding(psize); unsigned long lpcr; diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 8c86422..731be74 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -127,11 +127,6 @@ BEGIN_FTR_SECTION stw r10, HSTATE_PMC + 24(r13) stw r11, HSTATE_PMC + 28(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -BEGIN_FTR_SECTION - mfspr r9, SPRN_SIER - std r8, HSTATE_MMCR + 40(r13) - std r9, HSTATE_MMCR + 48(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 31: /* diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e62243..5a24d3c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -814,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, r = hpte[i+1]; /* - * Check the HPTE again, including large page size - * Since we don't currently allow any MPSS (mixed - * page-size segment) page sizes, it is sufficient - * to check against the actual page size. + * Check the HPTE again, including base page size */ if ((v & valid) && (v & mask) == val && - hpte_page_size(v, r) == (1ul << pshift)) + hpte_base_page_size(v, r) == (1ul << pshift)) /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 868347e..558a67d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -48,7 +48,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e3..d044b8b 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include <asm/exception-64s.h> #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675..16c4d88 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) @@ -146,7 +150,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index edb14ba..ef27fbd 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 3 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; - server = args->args[1]; - priority = args->args[2]; + irq = be32_to_cpu(args->args[0]); + server = be32_to_cpu(args->args[1]); + priority = be32_to_cpu(args->args[2]); rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 1 || args->nret != 3) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); server = priority = 0; rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) goto out; } - args->rets[1] = server; - args->rets[2] = priority; + args->rets[1] = cpu_to_be32(server); + args->rets[2] = cpu_to_be32(priority); out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_off(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_on(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } #endif /* CONFIG_KVM_XICS */ @@ -205,32 +205,6 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) return rc; } -static void kvmppc_rtas_swap_endian_in(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - args->token = be32_to_cpu(args->token); - args->nargs = be32_to_cpu(args->nargs); - args->nret = be32_to_cpu(args->nret); - for (i = 0; i < args->nargs; i++) - args->args[i] = be32_to_cpu(args->args[i]); -#endif -} - -static void kvmppc_rtas_swap_endian_out(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - for (i = 0; i < args->nret; i++) - args->args[i] = cpu_to_be32(args->args[i]); - args->token = cpu_to_be32(args->token); - args->nargs = cpu_to_be32(args->nargs); - args->nret = cpu_to_be32(args->nret); -#endif -} - int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) { struct rtas_token_definition *d; @@ -249,8 +223,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc) goto fail; - kvmppc_rtas_swap_endian_in(&args); - /* * args->rets is a pointer into args->args. Now that we've * copied args we need to fix it up to point into our copy, @@ -258,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; - args.rets = &args.args[args.nargs]; + args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { - if (d->token == args.token) { + if (d->token == be32_to_cpu(args.token)) { d->handler->handler(vcpu, &args); rc = 0; break; @@ -275,7 +247,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc == 0) { args.rets = orig_rets; - kvmppc_rtas_swap_endian_out(&args); rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); if (rc) goto fail; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03..86903d3 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -473,7 +473,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (printk_ratelimit()) pr_err("%s: pte not present: gfn %lx, pfn %lx\n", __func__, (long)gfn, pfn); - return -EINVAL; + ret = -EINVAL; + goto out; } kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 0738f96..43435c6 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -77,7 +77,7 @@ _GLOBAL(memset) stb r4,0(r6) blr -_GLOBAL(memmove) +_GLOBAL_TOC(memmove) cmplw 0,r3,r4 bgt backwards_memcpy b memcpy diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 412dd46..5c09f36 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1198,7 +1198,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); - if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1208,7 +1208,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1216,7 +1216,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef __powerpc64__ case 27: /* sld */ - sh = regs->gpr[rd] & 0x7f; + sh = regs->gpr[rb] & 0x7f; if (sh < 64) regs->gpr[ra] = regs->gpr[rd] << sh; else @@ -1235,7 +1235,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); - if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1246,7 +1246,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb | ((instr & 2) << 4); ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index af3d78e..928ebe7 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -410,17 +410,7 @@ void __init mmu_context_init(void) } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; last_context = 65535; - } else -#ifdef CONFIG_PPC_BOOK3E_MMU - if (mmu_has_feature(MMU_FTR_TYPE_3E)) { - u32 mmucfg = mfspr(SPRN_MMUCFG); - u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK) - >> MMUCFG_PIDSIZE_SHIFT; - first_context = 1; - last_context = (1UL << (pid_bits + 1)) - 1; - } else -#endif - { + } else { first_context = 1; last_context = 255; } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 6dcdade..82e82ca 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -390,12 +390,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) - PPC_ANDI(r_A, r_A, VLAN_VID_MASK); - else + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { + PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); + } else { PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); + PPC_SRWI(r_A, r_A, 12); + } break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 4520c93..fe52db2 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -485,7 +485,7 @@ static bool is_ebb_event(struct perf_event *event) * check that the PMU supports EBB, meaning those that don't can still * use bit 63 of the event code for something else if they wish. */ - return (ppmu->flags & PPMU_EBB) && + return (ppmu->flags & PPMU_ARCH_207S) && ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1); } @@ -777,7 +777,7 @@ void perf_event_print_debug(void) if (ppmu->flags & PPMU_HAS_SIER) sier = mfspr(SPRN_SIER); - if (ppmu->flags & PPMU_EBB) { + if (ppmu->flags & PPMU_ARCH_207S) { pr_info("MMCR2: %016lx EBBHR: %016lx\n", mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR)); pr_info("EBBRR: %016lx BESCR: %016lx\n", @@ -996,7 +996,22 @@ static void power_pmu_read(struct perf_event *event) } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); local64_add(delta, &event->count); - local64_sub(delta, &event->hw.period_left); + + /* + * A number of places program the PMC with (0x80000000 - period_left). + * We never want period_left to be less than 1 because we will program + * the PMC with a value >= 0x800000000 and an edge detected PMC will + * roll around to 0 before taking an exception. We have seen this + * on POWER8. + * + * To fix this, clamp the minimum value of period_left to 1. + */ + do { + prev = local64_read(&event->hw.period_left); + val = prev - delta; + if (val < 1) + val = 1; + } while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev); } /* @@ -1292,6 +1307,9 @@ static void power_pmu_enable(struct pmu *pmu) out_enable: pmao_restore_workaround(ebb); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, 0); + mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); mb(); @@ -1696,7 +1714,7 @@ static int power_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) { /* PMU has BHRB enabled */ - if (!(ppmu->flags & PPMU_BHRB)) + if (!(ppmu->flags & PPMU_ARCH_207S)) return -EOPNOTSUPP; } diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index fe2763b..639cd91 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -792,7 +792,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, - .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, + .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, .cache_events = &power8_cache_events, diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 38e0a1a..5e6e0ba 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -111,6 +111,7 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) return ret; } +#ifdef CONFIG_COREDUMP int elf_coredump_extra_notes_size(void) { struct spufs_calls *calls; @@ -142,6 +143,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm) return ret; } +#endif void notify_spus_active(void) { diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index b9d5d67..52a7d25 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -1,8 +1,9 @@ obj-$(CONFIG_SPU_FS) += spufs.o -spufs-y += inode.o file.o context.o syscalls.o coredump.o +spufs-y += inode.o file.o context.o syscalls.o spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o spufs-y += switch.o fault.o lscsa_alloc.o +spufs-$(CONFIG_COREDUMP) += coredump.o # magic for the trace events CFLAGS_sched.o := -I$(src) diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index b045fdd..a87200a 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -79,8 +79,10 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, struct spufs_calls spufs_calls = { .create_thread = do_spu_create, .spu_run = do_spu_run, - .coredump_extra_notes_size = spufs_coredump_extra_notes_size, - .coredump_extra_notes_write = spufs_coredump_extra_notes_write, .notify_spus_active = do_notify_spus_active, .owner = THIS_MODULE, +#ifdef CONFIG_COREDUMP + .coredump_extra_notes_size = spufs_coredump_extra_notes_size, + .coredump_extra_notes_write = spufs_coredump_extra_notes_write, +#endif }; diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 10268c4..0ad533b 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -249,7 +249,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { - pr_err("ELOG: Opal log read failed\n"); + pr_err("ELOG: OPAL log info read failed\n"); return; } @@ -257,7 +257,7 @@ static void elog_work_fn(struct work_struct *work) log_id = be64_to_cpu(id); elog_type = be64_to_cpu(type); - BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); + WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); if (elog_size >= OPAL_MAX_ERRLOG_SIZE) elog_size = OPAL_MAX_ERRLOG_SIZE; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 022b38e6..2d0b4d6 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -86,6 +86,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, } of_node_set_flag(dn, OF_DYNAMIC); + of_node_init(dn); return dn; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 0435bb6..1c0a60d 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -69,6 +69,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist np->properties = proplist; of_node_set_flag(np, OF_DYNAMIC); + of_node_init(np); np->parent = derive_parent(path); if (IS_ERR(np->parent)) { diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index df38c70..18ea9e3 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -51,8 +51,8 @@ static inline int restore_fp_ctl(u32 *fpc) return 0; asm volatile( - "0: lfpc %1\n" - " la %0,0\n" + " lfpc %1\n" + "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6a9a9eb..7366373 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -36,6 +36,7 @@ header-y += signal.h header-y += socket.h header-y += sockios.h header-y += sclp_ctl.h +header-y += sie.h header-y += stat.h header-y += statfs.h header-y += swab.h diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 3d97f61..5d9cc19 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -1,8 +1,6 @@ #ifndef _UAPI_ASM_S390_SIE_H #define _UAPI_ASM_S390_SIE_H -#include <asm/sigp.h> - #define diagnose_codes \ { 0x10, "DIAG (0x10) release pages" }, \ { 0x44, "DIAG (0x44) time slice end" }, \ @@ -13,18 +11,18 @@ { 0x500, "DIAG (0x500) KVM virtio functions" }, \ { 0x501, "DIAG (0x501) KVM breakpoint" } -#define sigp_order_codes \ - { SIGP_SENSE, "SIGP sense" }, \ - { SIGP_EXTERNAL_CALL, "SIGP external call" }, \ - { SIGP_EMERGENCY_SIGNAL, "SIGP emergency signal" }, \ - { SIGP_STOP, "SIGP stop" }, \ - { SIGP_STOP_AND_STORE_STATUS, "SIGP stop and store status" }, \ - { SIGP_SET_ARCHITECTURE, "SIGP set architecture" }, \ - { SIGP_SET_PREFIX, "SIGP set prefix" }, \ - { SIGP_SENSE_RUNNING, "SIGP sense running" }, \ - { SIGP_RESTART, "SIGP restart" }, \ - { SIGP_INITIAL_CPU_RESET, "SIGP initial cpu reset" }, \ - { SIGP_STORE_STATUS_AT_ADDRESS, "SIGP store status at address" } +#define sigp_order_codes \ + { 0x01, "SIGP sense" }, \ + { 0x02, "SIGP external call" }, \ + { 0x03, "SIGP emergency signal" }, \ + { 0x05, "SIGP stop" }, \ + { 0x06, "SIGP restart" }, \ + { 0x09, "SIGP stop and store status" }, \ + { 0x0b, "SIGP initial cpu reset" }, \ + { 0x0d, "SIGP set prefix" }, \ + { 0x0e, "SIGP store status at address" }, \ + { 0x12, "SIGP set architecture" }, \ + { 0x15, "SIGP sense running" } #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 7ba7d67..e88d35d 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -437,11 +437,11 @@ ENTRY(startup_kdump) #if defined(CONFIG_64BIT) #if defined(CONFIG_MARCH_ZEC12) - .long 3, 0xc100efea, 0xf46ce800, 0x00400000 + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 #elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100efea, 0xf46c0000 + .long 2, 0xc100eff2, 0xf46c0000 #elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100efea, 0xf0680000 + .long 2, 0xc100eff2, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) .long 1, 0xc100efc2 #elif defined(CONFIG_MARCH_Z990) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2d716734..5dc7ad9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -334,9 +334,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; - if ((data & ~mask) != PSW_USER_BITS) + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ return -EINVAL; if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ return -EINVAL; } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; @@ -672,9 +677,12 @@ static int __poke_user_compat(struct task_struct *child, mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~mask) != PSW32_USER_BITS) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 9ddc51e..30de427 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -48,13 +48,10 @@ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); -static void zpci_enable_irq(struct irq_data *data); -static void zpci_disable_irq(struct irq_data *data); - static struct irq_chip zpci_irq_chip = { .name = "zPCI", - .irq_unmask = zpci_enable_irq, - .irq_mask = zpci_disable_irq, + .irq_unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, }; static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); @@ -244,43 +241,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - int offset, pos; - u32 mask_bits; - - if (msi->msi_attrib.is_msix) { - offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else if (msi->msi_attrib.maskbit) { - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else - return 0; - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -static void zpci_enable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 0); -} - -static void zpci_disable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 1); -} - void pcibios_fixup_bus(struct pci_bus *bus) { } @@ -487,7 +447,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ list_for_each_entry(msi, &pdev->msi_list, list) { - zpci_msi_set_mask_bits(msi, 1, 1); + if (msi->msi_attrib.is_msix) + default_msix_mask_irq(msi, 1); + else + default_msi_mask_irq(msi, 1, 1); irq_set_msi_desc(msi->irq, NULL); irq_free_desc(msi->irq); msi->msg.address_lo = 0; diff --git a/arch/sh/Makefile b/arch/sh/Makefile index d4d16e4..bf5b3f5 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -32,7 +32,8 @@ endif cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,) cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \ - $(call cc-option,-m2a-nofpu,) + $(call cc-option,-m2a-nofpu,) \ + $(call cc-option,-m4-nofpu,) cflags-$(CONFIG_CPU_SH3) := $(call cc-option,-m3,) cflags-$(CONFIG_CPU_SH4) := $(call cc-option,-m4,) \ $(call cc-option,-mno-implicit-fp,-m4-nofpu) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 29f2e98..407c87d 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -78,6 +78,7 @@ config SPARC64 select HAVE_C_RECORDMCOUNT select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index b73274f..42f2bca 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -410,8 +410,9 @@ #define __NR_finit_module 342 #define __NR_sched_setattr 343 #define __NR_sched_getattr 344 +#define __NR_renameat2 345 -#define NR_syscalls 345 +#define NR_syscalls 346 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index d066eb1..f834224 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -48,6 +48,7 @@ SIGN1(sys32_futex, compat_sys_futex, %o1) SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) +SIGN2(sys32_renameat2, sys_renameat2, %o0, %o2) .globl sys32_mmap2 sys32_mmap2: diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 151ace8..85fe9b1 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -86,3 +86,4 @@ sys_call_table: /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime /*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr +/*345*/ .long sys_renameat2 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 4bd4e2b..33ecba2 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -87,6 +87,7 @@ sys_call_table32: /*330*/ .word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr + .word sys32_renameat2 #endif /* CONFIG_COMPAT */ @@ -165,3 +166,4 @@ sys_call_table: /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr + .word sys_renameat2 diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 9472079..f1b3eb1 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -12,6 +12,7 @@ #include <mem_user.h> #include <os.h> #include <skas.h> +#include <kern_util.h> struct host_vm_change { struct host_vm_op { @@ -124,6 +125,9 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if ((addr >= STUB_START) && (addr < STUB_END)) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MUNMAP) && @@ -283,8 +287,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* This is not an else because ret is modified above */ if (ret) { printk(KERN_ERR "fix_range_common: failed, killing current " - "process\n"); + "process: %d\n", task_tgid_vnr(current)); + /* We are under mmap_sem, release it such that current can terminate */ + up_write(¤t->mm->mmap_sem); force_sig(SIGKILL, current); + do_signal(); } } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 974b874..5678c35 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -206,7 +206,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, int is_write = FAULT_WRITE(fi); unsigned long address = FAULT_ADDRESS(fi); - if (regs) + if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); if (!is_user && (address >= start_vm) && (address < end_vm)) { diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index d531879..908579f 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -54,7 +54,7 @@ static int ptrace_dump_regs(int pid) void wait_stub_done(int pid) { - int n, status, err, bad_stop = 0; + int n, status, err; while (1) { CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); @@ -74,8 +74,6 @@ void wait_stub_done(int pid) if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) return; - else - bad_stop = 1; bad_wait: err = ptrace_dump_regs(pid); @@ -85,10 +83,7 @@ bad_wait: printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status); - if (bad_stop) - kill(pid, SIGKILL); - else - fatal_sigsegv(); + fatal_sigsegv(); } extern unsigned long current_stub_stack(void); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a8f749e..d24887b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -131,6 +131,7 @@ config X86 select HAVE_CC_STACKPROTECTOR select GENERIC_CPU_AUTOPROBE select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 84c2234..7a6d43a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -91,10 +91,9 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct floppy boot is not supported. " - .ascii "Use a boot loader program instead.\r\n" + .ascii "Use a boot loader.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot ...\r\n" + .ascii "Remove disk and press any key to reboot...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -108,7 +107,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 3 # nr_sections + .word 4 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -250,6 +249,25 @@ section_table: .word 0 # NumberOfLineNumbers .long 0x60500020 # Characteristics (section flags) + # + # The offset & size fields are filled in by build.c. + # + .ascii ".bss" + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 + .long 0 # Size of initialized data + # on disk + .long 0x0 + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0xc8000080 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 1a2f212..a7661c4 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -143,7 +143,7 @@ static void usage(void) #ifdef CONFIG_EFI_STUB -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) { unsigned int pe_header; unsigned short num_sections; @@ -164,10 +164,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz put_unaligned_le32(size, section + 0x8); /* section header vma field */ - put_unaligned_le32(offset, section + 0xc); + put_unaligned_le32(vma, section + 0xc); /* section header 'size of initialised data' field */ - put_unaligned_le32(size, section + 0x10); + put_unaligned_le32(datasz, section + 0x10); /* section header 'file offset' field */ put_unaligned_le32(offset, section + 0x14); @@ -179,6 +179,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz } } +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + update_pecoff_section_header_fields(section_name, offset, size, size, offset); +} + static void update_pecoff_setup_and_reloc(unsigned int size) { u32 setup_offset = 0x200; @@ -203,9 +208,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) pe_header = get_unaligned_le32(&buf[0x3c]); - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -220,6 +222,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) update_pecoff_section_header(".text", text_start, text_sz); } +static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz) +{ + unsigned int pe_header; + unsigned int bss_sz = init_sz - file_sz; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of uninitialized data */ + put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]); + + /* Size of image */ + put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + + update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0); +} + static int reserve_pecoff_reloc_section(int c) { /* Reserve 0x20 bytes for .reloc section */ @@ -259,6 +277,8 @@ static void efi_stub_entry_update(void) static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, unsigned int file_sz) {} +static inline void update_pecoff_bss(unsigned int file_sz, + unsigned int init_sz) {} static inline void efi_stub_defaults(void) {} static inline void efi_stub_entry_update(void) {} @@ -310,7 +330,7 @@ static void parse_zoffset(char *fname) int main(int argc, char ** argv) { - unsigned int i, sz, setup_sectors; + unsigned int i, sz, setup_sectors, init_sz; int c; u32 sys_size; struct stat sb; @@ -376,7 +396,9 @@ int main(int argc, char ** argv) buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); - update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + init_sz = get_unaligned_le32(&buf[0x260]); + update_pecoff_bss(i + (sys_size * 16), init_sz); efi_stub_entry_update(); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 61d6e28..d551165 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o @@ -52,6 +53,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o +des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o @@ -76,7 +78,7 @@ ifeq ($(avx2_supported),yes) endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o -aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o ifeq ($(avx2_supported),yes) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S new file mode 100644 index 0000000..f091f122 --- /dev/null +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -0,0 +1,546 @@ +/* + * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) + * + * This is AES128/192/256 CTR mode optimization implementation. It requires + * the support of Intel(R) AESNI and AVX instructions. + * + * This work was inspired by the AES CTR mode optimization published + * in Intel Optimized IPSEC Cryptograhpic library. + * Additional information on it can be found at: + * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Sean Gulley <sean.m.gulley@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <linux/linkage.h> +#include <asm/inst.h> + +#define CONCAT(a,b) a##b +#define VMOVDQ vmovdqu + +#define xdata0 %xmm0 +#define xdata1 %xmm1 +#define xdata2 %xmm2 +#define xdata3 %xmm3 +#define xdata4 %xmm4 +#define xdata5 %xmm5 +#define xdata6 %xmm6 +#define xdata7 %xmm7 +#define xcounter %xmm8 +#define xbyteswap %xmm9 +#define xkey0 %xmm10 +#define xkey3 %xmm11 +#define xkey6 %xmm12 +#define xkey9 %xmm13 +#define xkey4 %xmm11 +#define xkey8 %xmm12 +#define xkey12 %xmm13 +#define xkeyA %xmm14 +#define xkeyB %xmm15 + +#define p_in %rdi +#define p_iv %rsi +#define p_keys %rdx +#define p_out %rcx +#define num_bytes %r8 + +#define tmp %r10 +#define DDQ(i) CONCAT(ddq_add_,i) +#define XMM(i) CONCAT(%xmm, i) +#define DDQ_DATA 0 +#define XDATA 1 +#define KEY_128 1 +#define KEY_192 2 +#define KEY_256 3 + +.section .rodata +.align 16 + +byteswap_const: + .octa 0x000102030405060708090A0B0C0D0E0F +ddq_add_1: + .octa 0x00000000000000000000000000000001 +ddq_add_2: + .octa 0x00000000000000000000000000000002 +ddq_add_3: + .octa 0x00000000000000000000000000000003 +ddq_add_4: + .octa 0x00000000000000000000000000000004 +ddq_add_5: + .octa 0x00000000000000000000000000000005 +ddq_add_6: + .octa 0x00000000000000000000000000000006 +ddq_add_7: + .octa 0x00000000000000000000000000000007 +ddq_add_8: + .octa 0x00000000000000000000000000000008 + +.text + +/* generate a unique variable for ddq_add_x */ + +.macro setddq n + var_ddq_add = DDQ(\n) +.endm + +/* generate a unique variable for xmm register */ +.macro setxdata n + var_xdata = XMM(\n) +.endm + +/* club the numeric 'id' to the symbol 'name' */ + +.macro club name, id +.altmacro + .if \name == DDQ_DATA + setddq %\id + .elseif \name == XDATA + setxdata %\id + .endif +.noaltmacro +.endm + +/* + * do_aes num_in_par load_keys key_len + * This increments p_in, but not p_out + */ +.macro do_aes b, k, key_len + .set by, \b + .set load_keys, \k + .set klen, \key_len + + .if (load_keys) + vmovdqa 0*16(p_keys), xkey0 + .endif + + vpshufb xbyteswap, xcounter, xdata0 + + .set i, 1 + .rept (by - 1) + club DDQ_DATA, i + club XDATA, i + vpaddd var_ddq_add(%rip), xcounter, var_xdata + vpshufb xbyteswap, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 1*16(p_keys), xkeyA + + vpxor xkey0, xdata0, xdata0 + club DDQ_DATA, by + vpaddd var_ddq_add(%rip), xcounter, xcounter + + .set i, 1 + .rept (by - 1) + club XDATA, i + vpxor xkey0, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 2*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 3*16(p_keys), xkeyA + .endif + .else + vmovdqa 3*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ + .set i, (i +1) + .endr + + add $(16*by), p_in + + .if (klen == KEY_128) + vmovdqa 4*16(p_keys), xkey4 + .else + .if (load_keys) + vmovdqa 4*16(p_keys), xkey4 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 3 */ + .set i, (i +1) + .endr + + vmovdqa 5*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkey4, var_xdata, var_xdata /* key 4 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 6*16(p_keys), xkeyB + .endif + .else + vmovdqa 6*16(p_keys), xkeyB + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ + .set i, (i +1) + .endr + + vmovdqa 7*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyB, var_xdata, var_xdata /* key 6 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + vmovdqa 8*16(p_keys), xkey8 + .else + .if (load_keys) + vmovdqa 8*16(p_keys), xkey8 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 9*16(p_keys), xkeyA + .endif + .else + vmovdqa 9*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkey8, var_xdata, var_xdata /* key 8 */ + .set i, (i +1) + .endr + + vmovdqa 10*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 9 */ + .set i, (i +1) + .endr + + .if (klen != KEY_128) + vmovdqa 11*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 10 */ + .if (klen == KEY_128) + vaesenclast xkeyB, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen != KEY_128) + .if (load_keys) + vmovdqa 12*16(p_keys), xkey12 + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 13*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + .if (klen == KEY_256) + /* key 12 */ + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenclast xkey12, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 14*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + /* key 13 */ + vaesenc xkeyA, var_xdata, var_xdata + .set i, (i +1) + .endr + + .set i, 0 + .rept by + club XDATA, i + /* key 14 */ + vaesenclast xkeyB, var_xdata, var_xdata + .set i, (i +1) + .endr + .endif + .endif + + .set i, 0 + .rept (by / 2) + .set j, (i+1) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + VMOVDQ (j*16 - 16*by)(p_in), xkeyB + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + club XDATA, j + vpxor xkeyB, var_xdata, var_xdata + .set i, (i+2) + .endr + + .if (i < by) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + .endif + + .set i, 0 + .rept by + club XDATA, i + VMOVDQ var_xdata, i*16(p_out) + .set i, (i+1) + .endr +.endm + +.macro do_aes_load val, key_len + do_aes \val, 1, \key_len +.endm + +.macro do_aes_noload val, key_len + do_aes \val, 0, \key_len +.endm + +/* main body of aes ctr load */ + +.macro do_aes_ctrmain key_len + + cmp $16, num_bytes + jb .Ldo_return2\key_len + + vmovdqa byteswap_const(%rip), xbyteswap + vmovdqu (p_iv), xcounter + vpshufb xbyteswap, xcounter, xcounter + + mov num_bytes, tmp + and $(7*16), tmp + jz .Lmult_of_8_blks\key_len + + /* 1 <= tmp <= 7 */ + cmp $(4*16), tmp + jg .Lgt4\key_len + je .Leq4\key_len + +.Llt4\key_len: + cmp $(2*16), tmp + jg .Leq3\key_len + je .Leq2\key_len + +.Leq1\key_len: + do_aes_load 1, \key_len + add $(1*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq2\key_len: + do_aes_load 2, \key_len + add $(2*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + + +.Leq3\key_len: + do_aes_load 3, \key_len + add $(3*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq4\key_len: + do_aes_load 4, \key_len + add $(4*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Lgt4\key_len: + cmp $(6*16), tmp + jg .Leq7\key_len + je .Leq6\key_len + +.Leq5\key_len: + do_aes_load 5, \key_len + add $(5*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq6\key_len: + do_aes_load 6, \key_len + add $(6*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq7\key_len: + do_aes_load 7, \key_len + add $(7*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Lmult_of_8_blks\key_len: + .if (\key_len != KEY_128) + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 4*16(p_keys), xkey4 + vmovdqa 8*16(p_keys), xkey8 + vmovdqa 12*16(p_keys), xkey12 + .else + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 3*16(p_keys), xkey4 + vmovdqa 6*16(p_keys), xkey8 + vmovdqa 9*16(p_keys), xkey12 + .endif +.align 16 +.Lmain_loop2\key_len: + /* num_bytes is a multiple of 8 and >0 */ + do_aes_noload 8, \key_len + add $(8*16), p_out + sub $(8*16), num_bytes + jne .Lmain_loop2\key_len + +.Ldo_return2\key_len: + /* return updated IV */ + vpshufb xbyteswap, xcounter, xcounter + vmovdqu xcounter, (p_iv) + ret +.endm + +/* + * routine to do AES128 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_128_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_128 + +ENDPROC(aes_ctr_enc_128_avx_by8) + +/* + * routine to do AES192 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_192_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_192 + +ENDPROC(aes_ctr_enc_192_avx_by8) + +/* + * routine to do AES256 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_256_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_256 + +ENDPROC(aes_ctr_enc_256_avx_by8) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 948ad0e..888950f 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -105,6 +105,9 @@ void crypto_fpu_exit(void); #define AVX_GEN4_OPTSIZE 4096 #ifdef CONFIG_X86_64 + +static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); @@ -155,6 +158,12 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, #ifdef CONFIG_AS_AVX +asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); /* * asmlinkage void aesni_gcm_precomp_avx_gen2() * gcm_data *my_ctx_data, context data @@ -472,6 +481,25 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } +#ifdef CONFIG_AS_AVX +static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv) +{ + /* + * based on key length, override with the by8 version + * of ctr mode encryption/decryption for improved performance + * aes_set_key_common() ensures that key length is one of + * {128,192,256} + */ + if (ctx->key_length == AES_KEYSIZE_128) + aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); + else if (ctx->key_length == AES_KEYSIZE_192) + aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); + else + aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); +} +#endif + static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -486,8 +514,8 @@ static int ctr_crypt(struct blkcipher_desc *desc, kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); + aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } @@ -1493,6 +1521,14 @@ static int __init aesni_init(void) aesni_gcm_enc_tfm = aesni_gcm_enc; aesni_gcm_dec_tfm = aesni_gcm_dec; } + aesni_ctr_enc_tfm = aesni_ctr_enc; +#ifdef CONFIG_AS_AVX + if (cpu_has_avx) { + /* optimize performance of ctr mode encryption transform */ + aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; + pr_info("AES CTR mode by8 optimization enabled\n"); + } +#endif #endif err = crypto_fpu_init(); diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dbc4339..26d49eb 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -72,6 +72,7 @@ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); +.text ENTRY(crc_pcl) #define bufp %rdi #define bufp_dw %edi @@ -216,15 +217,11 @@ LABEL crc_ %i ## 4) Combine three results: ################################################################ - lea (K_table-16)(%rip), bufp # first entry is for idx 1 + lea (K_table-8)(%rip), bufp # first entry is for idx 1 shlq $3, %rax # rax *= 8 - subq %rax, tmp # tmp -= rax*8 - shlq $1, %rax - subq %rax, tmp # tmp -= rax*16 - # (total tmp -= rax*24) - addq %rax, bufp - - movdqa (bufp), %xmm0 # 2 consts: K1:K2 + pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2 + leal (%eax,%eax,2), %eax # rax *= 3 (total *24) + subq %rax, tmp # tmp -= rax*24 movq crc_init, %xmm1 # CRC for block 1 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 @@ -238,9 +235,9 @@ LABEL crc_ %i mov crc2, crc_init crc32 %rax, crc_init -################################################################ -## 5) Check for end: -################################################################ + ################################################################ + ## 5) Check for end: + ################################################################ LABEL crc_ 0 mov tmp, len @@ -331,136 +328,136 @@ ENDPROC(crc_pcl) ################################################################ ## PCLMULQDQ tables - ## Table is 128 entries x 2 quad words each + ## Table is 128 entries x 2 words (8 bytes) each ################################################################ -.data -.align 64 +.section .rotata, "a", %progbits +.align 8 K_table: - .quad 0x14cd00bd6,0x105ec76f0 - .quad 0x0ba4fc28e,0x14cd00bd6 - .quad 0x1d82c63da,0x0f20c0dfe - .quad 0x09e4addf8,0x0ba4fc28e - .quad 0x039d3b296,0x1384aa63a - .quad 0x102f9b8a2,0x1d82c63da - .quad 0x14237f5e6,0x01c291d04 - .quad 0x00d3b6092,0x09e4addf8 - .quad 0x0c96cfdc0,0x0740eef02 - .quad 0x18266e456,0x039d3b296 - .quad 0x0daece73e,0x0083a6eec - .quad 0x0ab7aff2a,0x102f9b8a2 - .quad 0x1248ea574,0x1c1733996 - .quad 0x083348832,0x14237f5e6 - .quad 0x12c743124,0x02ad91c30 - .quad 0x0b9e02b86,0x00d3b6092 - .quad 0x018b33a4e,0x06992cea2 - .quad 0x1b331e26a,0x0c96cfdc0 - .quad 0x17d35ba46,0x07e908048 - .quad 0x1bf2e8b8a,0x18266e456 - .quad 0x1a3e0968a,0x11ed1f9d8 - .quad 0x0ce7f39f4,0x0daece73e - .quad 0x061d82e56,0x0f1d0f55e - .quad 0x0d270f1a2,0x0ab7aff2a - .quad 0x1c3f5f66c,0x0a87ab8a8 - .quad 0x12ed0daac,0x1248ea574 - .quad 0x065863b64,0x08462d800 - .quad 0x11eef4f8e,0x083348832 - .quad 0x1ee54f54c,0x071d111a8 - .quad 0x0b3e32c28,0x12c743124 - .quad 0x0064f7f26,0x0ffd852c6 - .quad 0x0dd7e3b0c,0x0b9e02b86 - .quad 0x0f285651c,0x0dcb17aa4 - .quad 0x010746f3c,0x018b33a4e - .quad 0x1c24afea4,0x0f37c5aee - .quad 0x0271d9844,0x1b331e26a - .quad 0x08e766a0c,0x06051d5a2 - .quad 0x093a5f730,0x17d35ba46 - .quad 0x06cb08e5c,0x11d5ca20e - .quad 0x06b749fb2,0x1bf2e8b8a - .quad 0x1167f94f2,0x021f3d99c - .quad 0x0cec3662e,0x1a3e0968a - .quad 0x19329634a,0x08f158014 - .quad 0x0e6fc4e6a,0x0ce7f39f4 - .quad 0x08227bb8a,0x1a5e82106 - .quad 0x0b0cd4768,0x061d82e56 - .quad 0x13c2b89c4,0x188815ab2 - .quad 0x0d7a4825c,0x0d270f1a2 - .quad 0x10f5ff2ba,0x105405f3e - .quad 0x00167d312,0x1c3f5f66c - .quad 0x0f6076544,0x0e9adf796 - .quad 0x026f6a60a,0x12ed0daac - .quad 0x1a2adb74e,0x096638b34 - .quad 0x19d34af3a,0x065863b64 - .quad 0x049c3cc9c,0x1e50585a0 - .quad 0x068bce87a,0x11eef4f8e - .quad 0x1524fa6c6,0x19f1c69dc - .quad 0x16cba8aca,0x1ee54f54c - .quad 0x042d98888,0x12913343e - .quad 0x1329d9f7e,0x0b3e32c28 - .quad 0x1b1c69528,0x088f25a3a - .quad 0x02178513a,0x0064f7f26 - .quad 0x0e0ac139e,0x04e36f0b0 - .quad 0x0170076fa,0x0dd7e3b0c - .quad 0x141a1a2e2,0x0bd6f81f8 - .quad 0x16ad828b4,0x0f285651c - .quad 0x041d17b64,0x19425cbba - .quad 0x1fae1cc66,0x010746f3c - .quad 0x1a75b4b00,0x18db37e8a - .quad 0x0f872e54c,0x1c24afea4 - .quad 0x01e41e9fc,0x04c144932 - .quad 0x086d8e4d2,0x0271d9844 - .quad 0x160f7af7a,0x052148f02 - .quad 0x05bb8f1bc,0x08e766a0c - .quad 0x0a90fd27a,0x0a3c6f37a - .quad 0x0b3af077a,0x093a5f730 - .quad 0x04984d782,0x1d22c238e - .quad 0x0ca6ef3ac,0x06cb08e5c - .quad 0x0234e0b26,0x063ded06a - .quad 0x1d88abd4a,0x06b749fb2 - .quad 0x04597456a,0x04d56973c - .quad 0x0e9e28eb4,0x1167f94f2 - .quad 0x07b3ff57a,0x19385bf2e - .quad 0x0c9c8b782,0x0cec3662e - .quad 0x13a9cba9e,0x0e417f38a - .quad 0x093e106a4,0x19329634a - .quad 0x167001a9c,0x14e727980 - .quad 0x1ddffc5d4,0x0e6fc4e6a - .quad 0x00df04680,0x0d104b8fc - .quad 0x02342001e,0x08227bb8a - .quad 0x00a2a8d7e,0x05b397730 - .quad 0x168763fa6,0x0b0cd4768 - .quad 0x1ed5a407a,0x0e78eb416 - .quad 0x0d2c3ed1a,0x13c2b89c4 - .quad 0x0995a5724,0x1641378f0 - .quad 0x19b1afbc4,0x0d7a4825c - .quad 0x109ffedc0,0x08d96551c - .quad 0x0f2271e60,0x10f5ff2ba - .quad 0x00b0bf8ca,0x00bf80dd2 - .quad 0x123888b7a,0x00167d312 - .quad 0x1e888f7dc,0x18dcddd1c - .quad 0x002ee03b2,0x0f6076544 - .quad 0x183e8d8fe,0x06a45d2b2 - .quad 0x133d7a042,0x026f6a60a - .quad 0x116b0f50c,0x1dd3e10e8 - .quad 0x05fabe670,0x1a2adb74e - .quad 0x130004488,0x0de87806c - .quad 0x000bcf5f6,0x19d34af3a - .quad 0x18f0c7078,0x014338754 - .quad 0x017f27698,0x049c3cc9c - .quad 0x058ca5f00,0x15e3e77ee - .quad 0x1af900c24,0x068bce87a - .quad 0x0b5cfca28,0x0dd07448e - .quad 0x0ded288f8,0x1524fa6c6 - .quad 0x059f229bc,0x1d8048348 - .quad 0x06d390dec,0x16cba8aca - .quad 0x037170390,0x0a3e3e02c - .quad 0x06353c1cc,0x042d98888 - .quad 0x0c4584f5c,0x0d73c7bea - .quad 0x1f16a3418,0x1329d9f7e - .quad 0x0531377e2,0x185137662 - .quad 0x1d8d9ca7c,0x1b1c69528 - .quad 0x0b25b29f2,0x18a08b5bc - .quad 0x19fb2a8b0,0x02178513a - .quad 0x1a08fe6ac,0x1da758ae0 - .quad 0x045cddf4e,0x0e0ac139e - .quad 0x1a91647f2,0x169cf9eb0 - .quad 0x1a0f717c4,0x0170076fa + .long 0x493c7d27, 0x00000001 + .long 0xba4fc28e, 0x493c7d27 + .long 0xddc0152b, 0xf20c0dfe + .long 0x9e4addf8, 0xba4fc28e + .long 0x39d3b296, 0x3da6d0cb + .long 0x0715ce53, 0xddc0152b + .long 0x47db8317, 0x1c291d04 + .long 0x0d3b6092, 0x9e4addf8 + .long 0xc96cfdc0, 0x740eef02 + .long 0x878a92a7, 0x39d3b296 + .long 0xdaece73e, 0x083a6eec + .long 0xab7aff2a, 0x0715ce53 + .long 0x2162d385, 0xc49f4f67 + .long 0x83348832, 0x47db8317 + .long 0x299847d5, 0x2ad91c30 + .long 0xb9e02b86, 0x0d3b6092 + .long 0x18b33a4e, 0x6992cea2 + .long 0xb6dd949b, 0xc96cfdc0 + .long 0x78d9ccb7, 0x7e908048 + .long 0xbac2fd7b, 0x878a92a7 + .long 0xa60ce07b, 0x1b3d8f29 + .long 0xce7f39f4, 0xdaece73e + .long 0x61d82e56, 0xf1d0f55e + .long 0xd270f1a2, 0xab7aff2a + .long 0xc619809d, 0xa87ab8a8 + .long 0x2b3cac5d, 0x2162d385 + .long 0x65863b64, 0x8462d800 + .long 0x1b03397f, 0x83348832 + .long 0xebb883bd, 0x71d111a8 + .long 0xb3e32c28, 0x299847d5 + .long 0x064f7f26, 0xffd852c6 + .long 0xdd7e3b0c, 0xb9e02b86 + .long 0xf285651c, 0xdcb17aa4 + .long 0x10746f3c, 0x18b33a4e + .long 0xc7a68855, 0xf37c5aee + .long 0x271d9844, 0xb6dd949b + .long 0x8e766a0c, 0x6051d5a2 + .long 0x93a5f730, 0x78d9ccb7 + .long 0x6cb08e5c, 0x18b0d4ff + .long 0x6b749fb2, 0xbac2fd7b + .long 0x1393e203, 0x21f3d99c + .long 0xcec3662e, 0xa60ce07b + .long 0x96c515bb, 0x8f158014 + .long 0xe6fc4e6a, 0xce7f39f4 + .long 0x8227bb8a, 0xa00457f7 + .long 0xb0cd4768, 0x61d82e56 + .long 0x39c7ff35, 0x8d6d2c43 + .long 0xd7a4825c, 0xd270f1a2 + .long 0x0ab3844b, 0x00ac29cf + .long 0x0167d312, 0xc619809d + .long 0xf6076544, 0xe9adf796 + .long 0x26f6a60a, 0x2b3cac5d + .long 0xa741c1bf, 0x96638b34 + .long 0x98d8d9cb, 0x65863b64 + .long 0x49c3cc9c, 0xe0e9f351 + .long 0x68bce87a, 0x1b03397f + .long 0x57a3d037, 0x9af01f2d + .long 0x6956fc3b, 0xebb883bd + .long 0x42d98888, 0x2cff42cf + .long 0x3771e98f, 0xb3e32c28 + .long 0xb42ae3d9, 0x88f25a3a + .long 0x2178513a, 0x064f7f26 + .long 0xe0ac139e, 0x4e36f0b0 + .long 0x170076fa, 0xdd7e3b0c + .long 0x444dd413, 0xbd6f81f8 + .long 0x6f345e45, 0xf285651c + .long 0x41d17b64, 0x91c9bd4b + .long 0xff0dba97, 0x10746f3c + .long 0xa2b73df1, 0x885f087b + .long 0xf872e54c, 0xc7a68855 + .long 0x1e41e9fc, 0x4c144932 + .long 0x86d8e4d2, 0x271d9844 + .long 0x651bd98b, 0x52148f02 + .long 0x5bb8f1bc, 0x8e766a0c + .long 0xa90fd27a, 0xa3c6f37a + .long 0xb3af077a, 0x93a5f730 + .long 0x4984d782, 0xd7c0557f + .long 0xca6ef3ac, 0x6cb08e5c + .long 0x234e0b26, 0x63ded06a + .long 0xdd66cbbb, 0x6b749fb2 + .long 0x4597456a, 0x4d56973c + .long 0xe9e28eb4, 0x1393e203 + .long 0x7b3ff57a, 0x9669c9df + .long 0xc9c8b782, 0xcec3662e + .long 0x3f70cc6f, 0xe417f38a + .long 0x93e106a4, 0x96c515bb + .long 0x62ec6c6d, 0x4b9e0f71 + .long 0xd813b325, 0xe6fc4e6a + .long 0x0df04680, 0xd104b8fc + .long 0x2342001e, 0x8227bb8a + .long 0x0a2a8d7e, 0x5b397730 + .long 0x6d9a4957, 0xb0cd4768 + .long 0xe8b6368b, 0xe78eb416 + .long 0xd2c3ed1a, 0x39c7ff35 + .long 0x995a5724, 0x61ff0e01 + .long 0x9ef68d35, 0xd7a4825c + .long 0x0c139b31, 0x8d96551c + .long 0xf2271e60, 0x0ab3844b + .long 0x0b0bf8ca, 0x0bf80dd2 + .long 0x2664fd8b, 0x0167d312 + .long 0xed64812d, 0x8821abed + .long 0x02ee03b2, 0xf6076544 + .long 0x8604ae0f, 0x6a45d2b2 + .long 0x363bd6b3, 0x26f6a60a + .long 0x135c83fd, 0xd8d26619 + .long 0x5fabe670, 0xa741c1bf + .long 0x35ec3279, 0xde87806c + .long 0x00bcf5f6, 0x98d8d9cb + .long 0x8ae00689, 0x14338754 + .long 0x17f27698, 0x49c3cc9c + .long 0x58ca5f00, 0x5bd2011f + .long 0xaa7c7ad5, 0x68bce87a + .long 0xb5cfca28, 0xdd07448e + .long 0xded288f8, 0x57a3d037 + .long 0x59f229bc, 0xdde8f5b9 + .long 0x6d390dec, 0x6956fc3b + .long 0x37170390, 0xa3e3e02c + .long 0x6353c1cc, 0x42d98888 + .long 0xc4584f5c, 0xd73c7bea + .long 0xf48642e9, 0x3771e98f + .long 0x531377e2, 0x80ff0093 + .long 0xdd35bc8d, 0xb42ae3d9 + .long 0xb25b29f2, 0x8fe4c34d + .long 0x9a5ede41, 0x2178513a + .long 0xa563905d, 0xdf99fc11 + .long 0x45cddf4e, 0xe0ac139e + .long 0xacfa3103, 0x6c23e841 + .long 0xa51b6135, 0x170076fa diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S new file mode 100644 index 0000000..038f6ae --- /dev/null +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -0,0 +1,805 @@ +/* + * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> + +.file "des3_ede-asm_64.S" +.text + +#define s1 .L_s1 +#define s2 ((s1) + (64*8)) +#define s3 ((s2) + (64*8)) +#define s4 ((s3) + (64*8)) +#define s5 ((s4) + (64*8)) +#define s6 ((s5) + (64*8)) +#define s7 ((s6) + (64*8)) +#define s8 ((s7) + (64*8)) + +/* register macros */ +#define CTX %rdi + +#define RL0 %r8 +#define RL1 %r9 +#define RL2 %r10 + +#define RL0d %r8d +#define RL1d %r9d +#define RL2d %r10d + +#define RR0 %r11 +#define RR1 %r12 +#define RR2 %r13 + +#define RR0d %r11d +#define RR1d %r12d +#define RR2d %r13d + +#define RW0 %rax +#define RW1 %rbx +#define RW2 %rcx + +#define RW0d %eax +#define RW1d %ebx +#define RW2d %ecx + +#define RW0bl %al +#define RW1bl %bl +#define RW2bl %cl + +#define RW0bh %ah +#define RW1bh %bh +#define RW2bh %ch + +#define RT0 %r15 +#define RT1 %rbp +#define RT2 %r14 +#define RT3 %rdx + +#define RT0d %r15d +#define RT1d %ebp +#define RT2d %r14d +#define RT3d %edx + +/*********************************************************************** + * 1-way 3DES + ***********************************************************************/ +#define do_permutation(a, b, offset, mask) \ + movl a, RT0d; \ + shrl $(offset), RT0d; \ + xorl b, RT0d; \ + andl $(mask), RT0d; \ + xorl RT0d, b; \ + shll $(offset), RT0d; \ + xorl RT0d, a; + +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation(left, right) \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + movl left##d, RW0d; \ + roll $1, right##d; \ + xorl right##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##d; \ + xorl RW0d, right##d; \ + roll $1, left##d; \ + expand_to_64bits(right, RT3); \ + expand_to_64bits(left, RT3); + +#define final_permutation(left, right) \ + compress_to_64bits(right); \ + compress_to_64bits(left); \ + movl right##d, RW0d; \ + rorl $1, left##d; \ + xorl left##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##d; \ + xorl RW0d, left##d; \ + rorl $1, right##d; \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); + +#define round1(n, from, to, load_next_key) \ + xorq from, RW0; \ + \ + movzbl RW0bl, RT0d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + shrq $16, RW0; \ + movq s8(, RT0, 8), RT0; \ + xorq s6(, RT1, 8), to; \ + movzbl RW0bl, RL1d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s4(, RT2, 8), RT0; \ + xorq s2(, RT3, 8), to; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + xorq s7(, RL1, 8), RT0; \ + xorq s5(, RT1, 8), to; \ + xorq s3(, RT2, 8), RT0; \ + load_next_key(n, RW0); \ + xorq RT0, to; \ + xorq s1(, RT3, 8), to; \ + +#define load_next_key(n, RWx) \ + movq (((n) + 1) * 8)(CTX), RWx; + +#define dummy2(a, b) /*_*/ + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +ENTRY(des3_ede_x86_64_crypt_blk) + /* input: + * %rdi: round keys, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + read_block(%rdx, RL0, RR0); + initial_permutation(RL0, RR0); + + movq (CTX), RW0; + + round1(0, RR0, RL0, load_next_key); + round1(1, RL0, RR0, load_next_key); + round1(2, RR0, RL0, load_next_key); + round1(3, RL0, RR0, load_next_key); + round1(4, RR0, RL0, load_next_key); + round1(5, RL0, RR0, load_next_key); + round1(6, RR0, RL0, load_next_key); + round1(7, RL0, RR0, load_next_key); + round1(8, RR0, RL0, load_next_key); + round1(9, RL0, RR0, load_next_key); + round1(10, RR0, RL0, load_next_key); + round1(11, RL0, RR0, load_next_key); + round1(12, RR0, RL0, load_next_key); + round1(13, RL0, RR0, load_next_key); + round1(14, RR0, RL0, load_next_key); + round1(15, RL0, RR0, load_next_key); + + round1(16+0, RL0, RR0, load_next_key); + round1(16+1, RR0, RL0, load_next_key); + round1(16+2, RL0, RR0, load_next_key); + round1(16+3, RR0, RL0, load_next_key); + round1(16+4, RL0, RR0, load_next_key); + round1(16+5, RR0, RL0, load_next_key); + round1(16+6, RL0, RR0, load_next_key); + round1(16+7, RR0, RL0, load_next_key); + round1(16+8, RL0, RR0, load_next_key); + round1(16+9, RR0, RL0, load_next_key); + round1(16+10, RL0, RR0, load_next_key); + round1(16+11, RR0, RL0, load_next_key); + round1(16+12, RL0, RR0, load_next_key); + round1(16+13, RR0, RL0, load_next_key); + round1(16+14, RL0, RR0, load_next_key); + round1(16+15, RR0, RL0, load_next_key); + + round1(32+0, RR0, RL0, load_next_key); + round1(32+1, RL0, RR0, load_next_key); + round1(32+2, RR0, RL0, load_next_key); + round1(32+3, RL0, RR0, load_next_key); + round1(32+4, RR0, RL0, load_next_key); + round1(32+5, RL0, RR0, load_next_key); + round1(32+6, RR0, RL0, load_next_key); + round1(32+7, RL0, RR0, load_next_key); + round1(32+8, RR0, RL0, load_next_key); + round1(32+9, RL0, RR0, load_next_key); + round1(32+10, RR0, RL0, load_next_key); + round1(32+11, RL0, RR0, load_next_key); + round1(32+12, RR0, RL0, load_next_key); + round1(32+13, RL0, RR0, load_next_key); + round1(32+14, RR0, RL0, load_next_key); + round1(32+15, RL0, RR0, dummy2); + + final_permutation(RR0, RL0); + write_block(%rsi, RR0, RL0); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ENDPROC(des3_ede_x86_64_crypt_blk) + +/*********************************************************************** + * 3-way 3DES + ***********************************************************************/ +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation3(left, right) \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + \ + movl left##0d, RW0d; \ + roll $1, right##0d; \ + xorl right##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##0d; \ + xorl RW0d, right##0d; \ + roll $1, left##0d; \ + expand_to_64bits(right##0, RT3); \ + expand_to_64bits(left##0, RT3); \ + movl left##1d, RW1d; \ + roll $1, right##1d; \ + xorl right##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, left##1d; \ + xorl RW1d, right##1d; \ + roll $1, left##1d; \ + expand_to_64bits(right##1, RT3); \ + expand_to_64bits(left##1, RT3); \ + movl left##2d, RW2d; \ + roll $1, right##2d; \ + xorl right##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, left##2d; \ + xorl RW2d, right##2d; \ + roll $1, left##2d; \ + expand_to_64bits(right##2, RT3); \ + expand_to_64bits(left##2, RT3); + +#define final_permutation3(left, right) \ + compress_to_64bits(right##0); \ + compress_to_64bits(left##0); \ + movl right##0d, RW0d; \ + rorl $1, left##0d; \ + xorl left##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##0d; \ + xorl RW0d, left##0d; \ + rorl $1, right##0d; \ + compress_to_64bits(right##1); \ + compress_to_64bits(left##1); \ + movl right##1d, RW1d; \ + rorl $1, left##1d; \ + xorl left##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, right##1d; \ + xorl RW1d, left##1d; \ + rorl $1, right##1d; \ + compress_to_64bits(right##2); \ + compress_to_64bits(left##2); \ + movl right##2d, RW2d; \ + rorl $1, left##2d; \ + xorl left##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, right##2d; \ + xorl RW2d, left##2d; \ + rorl $1, right##2d; \ + \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); + +#define round3(n, from, to, load_next_key, do_movq) \ + xorq from##0, RW0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s8(, RT3, 8), to##0; \ + xorq s6(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s4(, RT3, 8), to##0; \ + xorq s2(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s7(, RT3, 8), to##0; \ + xorq s5(, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + load_next_key(n, RW0); \ + xorq s3(, RT3, 8), to##0; \ + xorq s1(, RT1, 8), to##0; \ + xorq from##1, RW1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s8(, RT3, 8), to##1; \ + xorq s6(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s4(, RT3, 8), to##1; \ + xorq s2(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrl $16, RW1d; \ + xorq s7(, RT3, 8), to##1; \ + xorq s5(, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + do_movq(RW0, RW1); \ + xorq s3(, RT3, 8), to##1; \ + xorq s1(, RT1, 8), to##1; \ + xorq from##2, RW2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s8(, RT3, 8), to##2; \ + xorq s6(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s4(, RT3, 8), to##2; \ + xorq s2(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrl $16, RW2d; \ + xorq s7(, RT3, 8), to##2; \ + xorq s5(, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + do_movq(RW0, RW2); \ + xorq s3(, RT3, 8), to##2; \ + xorq s1(, RT1, 8), to##2; + +#define __movq(src, dst) \ + movq src, dst; + +ENTRY(des3_ede_x86_64_crypt_blk_3way) + /* input: + * %rdi: ctx, round keys + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + */ + + pushq %rbp; + pushq %rbx; + pushq %r12; + pushq %r13; + pushq %r14; + pushq %r15; + + /* load input */ + movl 0 * 4(%rdx), RL0d; + movl 1 * 4(%rdx), RR0d; + movl 2 * 4(%rdx), RL1d; + movl 3 * 4(%rdx), RR1d; + movl 4 * 4(%rdx), RL2d; + movl 5 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + initial_permutation3(RL, RR); + + movq 0(CTX), RW0; + movq RW0, RW1; + movq RW0, RW2; + + round3(0, RR, RL, load_next_key, __movq); + round3(1, RL, RR, load_next_key, __movq); + round3(2, RR, RL, load_next_key, __movq); + round3(3, RL, RR, load_next_key, __movq); + round3(4, RR, RL, load_next_key, __movq); + round3(5, RL, RR, load_next_key, __movq); + round3(6, RR, RL, load_next_key, __movq); + round3(7, RL, RR, load_next_key, __movq); + round3(8, RR, RL, load_next_key, __movq); + round3(9, RL, RR, load_next_key, __movq); + round3(10, RR, RL, load_next_key, __movq); + round3(11, RL, RR, load_next_key, __movq); + round3(12, RR, RL, load_next_key, __movq); + round3(13, RL, RR, load_next_key, __movq); + round3(14, RR, RL, load_next_key, __movq); + round3(15, RL, RR, load_next_key, __movq); + + round3(16+0, RL, RR, load_next_key, __movq); + round3(16+1, RR, RL, load_next_key, __movq); + round3(16+2, RL, RR, load_next_key, __movq); + round3(16+3, RR, RL, load_next_key, __movq); + round3(16+4, RL, RR, load_next_key, __movq); + round3(16+5, RR, RL, load_next_key, __movq); + round3(16+6, RL, RR, load_next_key, __movq); + round3(16+7, RR, RL, load_next_key, __movq); + round3(16+8, RL, RR, load_next_key, __movq); + round3(16+9, RR, RL, load_next_key, __movq); + round3(16+10, RL, RR, load_next_key, __movq); + round3(16+11, RR, RL, load_next_key, __movq); + round3(16+12, RL, RR, load_next_key, __movq); + round3(16+13, RR, RL, load_next_key, __movq); + round3(16+14, RL, RR, load_next_key, __movq); + round3(16+15, RR, RL, load_next_key, __movq); + + round3(32+0, RR, RL, load_next_key, __movq); + round3(32+1, RL, RR, load_next_key, __movq); + round3(32+2, RR, RL, load_next_key, __movq); + round3(32+3, RL, RR, load_next_key, __movq); + round3(32+4, RR, RL, load_next_key, __movq); + round3(32+5, RL, RR, load_next_key, __movq); + round3(32+6, RR, RL, load_next_key, __movq); + round3(32+7, RL, RR, load_next_key, __movq); + round3(32+8, RR, RL, load_next_key, __movq); + round3(32+9, RL, RR, load_next_key, __movq); + round3(32+10, RR, RL, load_next_key, __movq); + round3(32+11, RL, RR, load_next_key, __movq); + round3(32+12, RR, RL, load_next_key, __movq); + round3(32+13, RL, RR, load_next_key, __movq); + round3(32+14, RR, RL, load_next_key, __movq); + round3(32+15, RL, RR, dummy2, dummy2); + + final_permutation3(RR, RL); + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + popq %r14; + popq %r13; + popq %r12; + popq %rbx; + popq %rbp; + + ret; +ENDPROC(des3_ede_x86_64_crypt_blk_3way) + +.data +.align 16 +.L_s1: + .quad 0x0010100001010400, 0x0000000000000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0010100001010004, 0x0000100000010404 + .quad 0x0000000000000004, 0x0000100000010000 + .quad 0x0000000000000400, 0x0010100001010400 + .quad 0x0010100001010404, 0x0000000000000400 + .quad 0x0010000001000404, 0x0010100001010004 + .quad 0x0010000001000000, 0x0000000000000004 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000100000010400 + .quad 0x0000100000010400, 0x0010100001010000 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0000100000010004, 0x0010000001000004 + .quad 0x0010000001000004, 0x0000100000010004 + .quad 0x0000000000000000, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010000001000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0000000000000004, 0x0010100001010000 + .quad 0x0010100001010400, 0x0010000001000000 + .quad 0x0010000001000000, 0x0000000000000400 + .quad 0x0010100001010004, 0x0000100000010000 + .quad 0x0000100000010400, 0x0010000001000004 + .quad 0x0000000000000400, 0x0000000000000004 + .quad 0x0010000001000404, 0x0000100000010404 + .quad 0x0010100001010404, 0x0000100000010004 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0010000001000004, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010100001010400 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000000000000000 + .quad 0x0000100000010004, 0x0000100000010400 + .quad 0x0000000000000000, 0x0010100001010004 +.L_s2: + .quad 0x0801080200100020, 0x0800080000000000 + .quad 0x0000080000000000, 0x0001080200100020 + .quad 0x0001000000100000, 0x0000000200000020 + .quad 0x0801000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0801080200100020 + .quad 0x0801080000100000, 0x0800000000000000 + .quad 0x0800080000000000, 0x0001000000100000 + .quad 0x0000000200000020, 0x0801000200100020 + .quad 0x0001080000100000, 0x0001000200100020 + .quad 0x0800080200000020, 0x0000000000000000 + .quad 0x0800000000000000, 0x0000080000000000 + .quad 0x0001080200100020, 0x0801000000100000 + .quad 0x0001000200100020, 0x0800000200000020 + .quad 0x0000000000000000, 0x0001080000100000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0801000000100000, 0x0000080200000020 + .quad 0x0000000000000000, 0x0001080200100020 + .quad 0x0801000200100020, 0x0001000000100000 + .quad 0x0800080200000020, 0x0801000000100000 + .quad 0x0801080000100000, 0x0000080000000000 + .quad 0x0801000000100000, 0x0800080000000000 + .quad 0x0000000200000020, 0x0801080200100020 + .quad 0x0001080200100020, 0x0000000200000020 + .quad 0x0000080000000000, 0x0800000000000000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0001000000100000, 0x0800000200000020 + .quad 0x0001000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0001000200100020 + .quad 0x0001080000100000, 0x0000000000000000 + .quad 0x0800080000000000, 0x0000080200000020 + .quad 0x0800000000000000, 0x0801000200100020 + .quad 0x0801080200100020, 0x0001080000100000 +.L_s3: + .quad 0x0000002000000208, 0x0000202008020200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000202000020208, 0x0000002008000200 + .quad 0x0000200000020008, 0x0000000008000008 + .quad 0x0000000008000008, 0x0000200000020000 + .quad 0x0000202008020208, 0x0000200000020008 + .quad 0x0000200008020000, 0x0000002000000208 + .quad 0x0000000008000000, 0x0000000000000008 + .quad 0x0000202008020200, 0x0000002000000200 + .quad 0x0000202000020200, 0x0000200008020000 + .quad 0x0000200008020008, 0x0000202000020208 + .quad 0x0000002008000208, 0x0000202000020200 + .quad 0x0000200000020000, 0x0000002008000208 + .quad 0x0000000000000008, 0x0000202008020208 + .quad 0x0000002000000200, 0x0000000008000000 + .quad 0x0000202008020200, 0x0000000008000000 + .quad 0x0000200000020008, 0x0000002000000208 + .quad 0x0000200000020000, 0x0000202008020200 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000002000000200, 0x0000200000020008 + .quad 0x0000202008020208, 0x0000002008000200 + .quad 0x0000000008000008, 0x0000002000000200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000208, 0x0000200000020000 + .quad 0x0000000008000000, 0x0000202008020208 + .quad 0x0000000000000008, 0x0000202000020208 + .quad 0x0000202000020200, 0x0000000008000008 + .quad 0x0000200008020000, 0x0000002008000208 + .quad 0x0000002000000208, 0x0000200008020000 + .quad 0x0000202000020208, 0x0000000000000008 + .quad 0x0000200008020008, 0x0000202000020200 +.L_s4: + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x0000020000002000, 0x0008020800002000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x0000020000002000, 0x0008020800002000 +.L_s5: + .quad 0x0000001000000100, 0x0020001002080100 + .quad 0x0020000002080000, 0x0420001002000100 + .quad 0x0000000000080000, 0x0000001000000100 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0400001000080100, 0x0000000000080000 + .quad 0x0020001002000100, 0x0400001000080100 + .quad 0x0420001002000100, 0x0420000002080000 + .quad 0x0000001000080100, 0x0400000000000000 + .quad 0x0020000002000000, 0x0400000000080000 + .quad 0x0400000000080000, 0x0000000000000000 + .quad 0x0400001000000100, 0x0420001002080100 + .quad 0x0420001002080100, 0x0020001002000100 + .quad 0x0420000002080000, 0x0400001000000100 + .quad 0x0000000000000000, 0x0420000002000000 + .quad 0x0020001002080100, 0x0020000002000000 + .quad 0x0420000002000000, 0x0000001000080100 + .quad 0x0000000000080000, 0x0420001002000100 + .quad 0x0000001000000100, 0x0020000002000000 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0420001002000100, 0x0400001000080100 + .quad 0x0020001002000100, 0x0400000000000000 + .quad 0x0420000002080000, 0x0020001002080100 + .quad 0x0400001000080100, 0x0000001000000100 + .quad 0x0020000002000000, 0x0420000002080000 + .quad 0x0420001002080100, 0x0000001000080100 + .quad 0x0420000002000000, 0x0420001002080100 + .quad 0x0020000002080000, 0x0000000000000000 + .quad 0x0400000000080000, 0x0420000002000000 + .quad 0x0000001000080100, 0x0020001002000100 + .quad 0x0400001000000100, 0x0000000000080000 + .quad 0x0000000000000000, 0x0400000000080000 + .quad 0x0020001002080100, 0x0400001000000100 +.L_s6: + .quad 0x0200000120000010, 0x0204000020000000 + .quad 0x0000040000000000, 0x0204040120000010 + .quad 0x0204000020000000, 0x0000000100000010 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0200040020000000, 0x0004040100000010 + .quad 0x0004000000000000, 0x0200000120000010 + .quad 0x0004000100000010, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0000000000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000040000000000 + .quad 0x0004040000000000, 0x0200040120000010 + .quad 0x0000000100000010, 0x0204000120000010 + .quad 0x0204000120000010, 0x0000000000000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000040100000010, 0x0004040000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0200040020000000, 0x0000000100000010 + .quad 0x0204000120000010, 0x0004040000000000 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0000040100000010, 0x0200000120000010 + .quad 0x0004000000000000, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0200000120000010, 0x0204040120000010 + .quad 0x0004040000000000, 0x0204000020000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000000000000000, 0x0204000120000010 + .quad 0x0000000100000010, 0x0000040000000000 + .quad 0x0204000020000000, 0x0004040100000010 + .quad 0x0000040000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000000000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0004000100000010, 0x0200040120000010 +.L_s7: + .quad 0x0002000000200000, 0x2002000004200002 + .quad 0x2000000004000802, 0x0000000000000000 + .quad 0x0000000000000800, 0x2000000004000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2002000004200802, 0x0002000000200000 + .quad 0x0000000000000000, 0x2000000004000002 + .quad 0x2000000000000002, 0x0000000004000000 + .quad 0x2002000004200002, 0x2000000000000802 + .quad 0x0000000004000800, 0x2002000000200802 + .quad 0x2002000000200002, 0x0000000004000800 + .quad 0x2000000004000002, 0x0002000004200000 + .quad 0x0002000004200800, 0x2002000000200002 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000000000802, 0x2002000004200802 + .quad 0x0002000000200800, 0x2000000000000002 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0002000000200000, 0x2000000004000802 + .quad 0x2000000004000802, 0x2002000004200002 + .quad 0x2002000004200002, 0x2000000000000002 + .quad 0x2002000000200002, 0x0000000004000000 + .quad 0x0000000004000800, 0x0002000000200000 + .quad 0x0002000004200800, 0x2000000000000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2000000000000802, 0x2000000004000002 + .quad 0x2002000004200802, 0x0002000004200000 + .quad 0x0002000000200800, 0x0000000000000000 + .quad 0x2000000000000002, 0x2002000004200802 + .quad 0x0000000000000000, 0x2002000000200802 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000004000002, 0x0000000004000800 + .quad 0x0000000000000800, 0x2002000000200002 +.L_s8: + .quad 0x0100010410001000, 0x0000010000001000 + .quad 0x0000000000040000, 0x0100010410041000 + .quad 0x0100000010000000, 0x0100010410001000 + .quad 0x0000000400000000, 0x0100000010000000 + .quad 0x0000000400040000, 0x0100000010040000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0100010010041000, 0x0000010400041000 + .quad 0x0000010000001000, 0x0000000400000000 + .quad 0x0100000010040000, 0x0100000410000000 + .quad 0x0100010010001000, 0x0000010400001000 + .quad 0x0000010000041000, 0x0000000400040000 + .quad 0x0100000410040000, 0x0100010010041000 + .quad 0x0000010400001000, 0x0000000000000000 + .quad 0x0000000000000000, 0x0100000410040000 + .quad 0x0100000410000000, 0x0100010010001000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0100010010041000, 0x0000010000001000 + .quad 0x0000000400000000, 0x0100000410040000 + .quad 0x0000010000001000, 0x0000010400041000 + .quad 0x0100010010001000, 0x0000000400000000 + .quad 0x0100000410000000, 0x0100000010040000 + .quad 0x0100000410040000, 0x0100000010000000 + .quad 0x0000000000040000, 0x0100010410001000 + .quad 0x0000000000000000, 0x0100010410041000 + .quad 0x0000000400040000, 0x0100000410000000 + .quad 0x0100000010040000, 0x0100010010001000 + .quad 0x0100010410001000, 0x0000000000000000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0000010000041000, 0x0000010400001000 + .quad 0x0000010400001000, 0x0000000400040000 + .quad 0x0100000010000000, 0x0100010010041000 diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c new file mode 100644 index 0000000..0e9c066 --- /dev/null +++ b/arch/x86/crypto/des3_ede_glue.c @@ -0,0 +1,509 @@ +/* + * Glue Code for assembler optimized version of 3DES + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <asm/processor.h> +#include <crypto/des.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <crypto/algapi.h> + +struct des3_ede_x86_ctx { + u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; + u32 dec_expkey[DES3_EDE_EXPKEY_WORDS]; +}; + +/* regular block cipher functions */ +asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst, + const u8 *src); + +static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *enc_ctx = ctx->enc_expkey; + + des3_ede_x86_64_crypt_blk(enc_ctx, dst, src); +} + +static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *dec_ctx = ctx->dec_expkey; + + des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); +} + +static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *enc_ctx = ctx->enc_expkey; + + des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src); +} + +static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, + const u8 *src) +{ + u32 *dec_ctx = ctx->dec_expkey; + + des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src); +} + +static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + const u32 *expkey) +{ + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + des3_ede_x86_64_crypt_blk_3way(expkey, wdst, + wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, ctx->enc_expkey); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, ctx->dec_expkey); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[3 - 1]; + u64 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * 3 - bsize; + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + dst[1] ^= ivs[0]; + dst[2] ^= ivs[1]; + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + } + + /* Handle leftovers */ + for (;;) { + des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, + struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[DES3_EDE_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + des3_ede_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = DES3_EDE_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + __be64 *src = (__be64 *)walk->src.virt.addr; + __be64 *dst = (__be64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[3]; + + /* Process four block batch */ + if (nbytes >= bsize * 3) { + do { + /* create ctrblks for parallel encrypt */ + ctrblocks[0] = cpu_to_be64(ctrblk++); + ctrblocks[1] = cpu_to_be64(ctrblk++); + ctrblocks[2] = cpu_to_be64(ctrblk++); + + des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks, + (u8 *)ctrblocks); + + dst[0] = src[0] ^ ctrblocks[0]; + dst[1] = src[1] ^ ctrblocks[1]; + dst[2] = src[2] ^ ctrblocks[2]; + + src += 3; + dst += 3; + } while ((nbytes -= bsize * 3) >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + ctrblocks[0] = cpu_to_be64(ctrblk++); + + des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + + dst[0] = src[0] ^ ctrblocks[0]; + + src += 1; + dst += 1; + } while ((nbytes -= bsize) >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm); + u32 i, j, tmp; + int err; + + /* Generate encryption context using generic implementation. */ + err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen); + if (err < 0) + return err; + + /* Fix encryption context for this implementation and form decryption + * context. */ + j = DES3_EDE_EXPKEY_WORDS - 2; + for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) { + tmp = ror32(ctx->enc_expkey[i + 1], 4); + ctx->enc_expkey[i + 1] = tmp; + + ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0]; + ctx->dec_expkey[j + 1] = tmp; + } + + return 0; +} + +static struct crypto_alg des3_ede_algs[4] = { { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_x86_setkey, + .cia_encrypt = des3_ede_x86_encrypt, + .cia_decrypt = des3_ede_x86_decrypt, + } + } +}, { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_x86_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, des3_ede-x86_64 is slower than generic C + * implementation because use of 64bit rotates (which are really + * slow on P4). Therefore blacklist P4s. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init des3_ede_x86_init(void) +{ + if (!force && is_blacklisted_cpu()) { + pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); + return -ENODEV; + } + + return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); +} + +static void __exit des3_ede_x86_fini(void) +{ + crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); +} + +module_init(des3_ede_x86_init); +module_exit(des3_ede_x86_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); +MODULE_ALIAS("des3_ede"); +MODULE_ALIAS("des3_ede-asm"); +MODULE_ALIAS("des"); +MODULE_ALIAS("des-asm"); +MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index f30cd10..8626b03 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -141,7 +141,7 @@ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) /* save number of bits */ bits[1] = cpu_to_be64(sctx->count[0] << 3); - bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61; + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); /* Pad out to 112 mod 128 and append length */ index = sctx->count[0] & 0x7f; diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf8..0a8b519 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void) #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ -#define INTERRUPT_RETURN iretq +#define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ sysretq; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4931415..49205d0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -95,7 +95,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 80 #define KVM_NR_FIXED_MTRR_REGION 88 -#define KVM_NR_VAR_MTRR 8 +#define KVM_NR_VAR_MTRR 10 #define ASYNC_PF_PER_VCPU 64 @@ -461,7 +461,7 @@ struct kvm_vcpu_arch { bool nmi_injected; /* Trying to inject an NMI this entry */ struct mtrr_state_type mtrr_state; - u32 pat; + u64 pat; unsigned switch_db_regs; unsigned long db[KVM_NR_DB_REGS]; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 851bcdc..fd47218 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -52,10 +52,9 @@ * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define raw_cpu_ptr(ptr) \ +#define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - __verify_pcpu_ptr(ptr); \ asm volatile("add " __percpu_arg(1) ", %0" \ : "=r" (tcp_ptr__) \ : "m" (this_cpu_off), "0" (ptr)); \ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 14fd6fd..6205f0c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -231,6 +231,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_INFO +/* + * When hitting ptrace_stop(), we cannot return using SYSRET because + * that does not restore the full CPU state, only a minimal set. The + * ptracer can change arbitrary register values, which is usually okay + * because the usual ptrace stops run off the signal delivery path which + * forces IRET; however, ptrace_event() stops happen in arbitrary places + * in the kernel and don't force IRET path. + * + * So force IRET path after a ptrace stop. + */ +#define arch_ptrace_stop_needed(code, info) \ +({ \ + set_thread_flag(TIF_NOTIFY_RESUME); \ + false; \ +}) + struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h index 44282fb..c4b9dc2 100644 --- a/arch/x86/include/asm/vga.h +++ b/arch/x86/include/asm/vga.h @@ -17,10 +17,4 @@ #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) -#ifdef CONFIG_FB_EFI -#define __ARCH_HAS_VGA_DEFAULT_DEVICE -extern struct pci_dev *vga_default_device(void); -extern void vga_set_default_device(struct pci_dev *pdev); -#endif - #endif /* _ASM_X86_VGA_H */ diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f3a1f04..5848744 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -841,7 +841,6 @@ static int apm_do_idle(void) u32 eax; u8 ret = 0; int idled = 0; - int polling; int err = 0; if (!need_resched()) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a800290..f9e4fdd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -370,6 +370,17 @@ static void init_intel(struct cpuinfo_x86 *c) */ detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); @@ -438,17 +449,6 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P3); #endif - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - /* Work around errata */ srat_detect_node(c); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a952e9c..9c8f739 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -730,6 +730,18 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } +#ifdef CONFIG_X86_HT + /* + * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in + * turns means that the only possibility is SMT (as indicated in + * cpuid1). Since cpuid2 doesn't specify shared caches, and we know + * that SMT shares all caches, we can unconditionally set cpu_llc_id to + * c->phys_proc_id. + */ + if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) + per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; +#endif + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bb92f38..9a79c8d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2451,6 +2451,12 @@ static __init int mcheck_init_device(void) for_each_online_cpu(i) { err = mce_device_create(i); if (err) { + /* + * Register notifier anyway (and do not unreg it) so + * that we don't leave undeleted timers, see notifier + * callback above. + */ + __register_hotcpu_notifier(&mce_cpu_notifier); cpu_notifier_register_done(); goto err_device_create; } @@ -2471,10 +2477,6 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); - cpu_notifier_register_begin(); - __unregister_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - err_device_create: /* * We didn't keep track of which devices were created above, but diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff..2879ecd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..8ade931 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -295,14 +295,16 @@ struct extra_reg { u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i, \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa..2502d0d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1382,6 +1382,15 @@ again: intel_pmu_lbr_read(); /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + + /* * PEBS overflow sets bit 62 in the global status register */ if (__test_and_clear_bit(62, (unsigned long *)&status)) { @@ -2173,6 +2182,41 @@ static void intel_snb_check_microcode(void) } } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + if (val_new != val_tmp) + return false; + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + static __init void intel_sandybridge_quirk(void) { x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2262,7 +2306,8 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + struct extra_reg *er; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2465,6 +2510,9 @@ __init int intel_pmu_init(void) case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -2565,6 +2613,34 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..696ade3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); - if (unlikely(!buffer)) + buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + if (unlikely(!buffer)) { + WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; + } max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; thresh = max / 16; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 65bbbea..ae6552a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), @@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, SNBEP_CBO_PMON_CTL_TID_EN, 0x1), SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), @@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dbaa23e..0d0c9d4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -425,8 +425,8 @@ sysenter_do_call: cmpl $(NR_syscalls), %eax jae sysenter_badsys call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) sysenter_after_call: + movl %eax,PT_EAX(%esp) LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF @@ -502,6 +502,7 @@ ENTRY(system_call) jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) +syscall_after_call: movl %eax,PT_EAX(%esp) # store the return value syscall_exit: LOCKDEP_SYS_EXIT @@ -675,12 +676,12 @@ syscall_fault: END(syscall_fault) syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp syscall_exit + movl $-ENOSYS,%eax + jmp syscall_after_call END(syscall_badsys) sysenter_badsys: - movl $-ENOSYS,PT_EAX(%esp) + movl $-ENOSYS,%eax jmp sysenter_after_call END(syscall_badsys) CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b25ca96..c844f08 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -830,27 +830,24 @@ restore_args: RESTORE_ARGS 1,8,1 irq_return: + INTERRUPT_RETURN + +ENTRY(native_iret) /* * Are we returning to a stack segment from the LDT? Note: in * 64-bit mode SS:RSP on the exception stack is always valid. */ #ifdef CONFIG_X86_ESPFIX64 testb $4,(SS-RIP)(%rsp) - jnz irq_return_ldt + jnz native_irq_return_ldt #endif -irq_return_iret: - INTERRUPT_RETURN - _ASM_EXTABLE(irq_return_iret, bad_iret) - -#ifdef CONFIG_PARAVIRT -ENTRY(native_iret) +native_irq_return_iret: iretq - _ASM_EXTABLE(native_iret, bad_iret) -#endif + _ASM_EXTABLE(native_irq_return_iret, bad_iret) #ifdef CONFIG_X86_ESPFIX64 -irq_return_ldt: +native_irq_return_ldt: pushq_cfi %rax pushq_cfi %rdi SWAPGS @@ -872,7 +869,7 @@ irq_return_ldt: SWAPGS movq %rax,%rsp popq_cfi %rax - jmp irq_return_iret + jmp native_irq_return_iret #endif .section .fixup,"ax" @@ -956,13 +953,8 @@ __do_double_fault: cmpl $__KERNEL_CS,CS(%rdi) jne do_double_fault movq RIP(%rdi),%rax - cmpq $irq_return_iret,%rax -#ifdef CONFIG_PARAVIRT - je 1f - cmpq $native_iret,%rax -#endif + cmpq $native_irq_return_iret,%rax jne do_double_fault /* This shouldn't happen... */ -1: movq PER_CPU_VAR(kernel_stack),%rax subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ movq %rax,RSP(%rdi) @@ -1428,7 +1420,7 @@ error_sti: */ error_kernelspace: incl %ebx - leaq irq_return_iret(%rip),%rcx + leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs movl %ecx,%eax /* zero extend */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 6afbb16..94d857f 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -175,7 +175,7 @@ void init_espfix_ap(void) if (!pud_present(pud)) { pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); - paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); } @@ -185,7 +185,7 @@ void init_espfix_ap(void) if (!pmd_present(pmd)) { pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); - paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) set_pmd(&pmd_p[n], pmd); } @@ -193,7 +193,6 @@ void init_espfix_ap(void) pte_p = pte_offset_kernel(&pmd, addr); stack_page = (void *)__get_free_page(GFP_KERNEL); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); - paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7596df6..67e6d19e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs) struct kprobe *p; struct kprobe_ctlblk *kcb; + if (user_mode_vm(regs)) + return 0; + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34..a1da673 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_cpu_ops, iret, "iretq"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 2a26819..80eab01 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail) void arch_remove_reservations(struct resource *avail) { - /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + /* + * Trim out BIOS area (high 2MB) and E820 regions. We do not remove + * the low 1MB unconditionally, as this area is needed for some ISA + * cards requiring a memory range, e.g. the i82365 PCMCIA controller. + */ if (avail->flags & IORESOURCE_MEM) { - if (avail->start < BIOS_END) - avail->start = BIOS_END; resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); remove_e820_regions(avail); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57e5ce1..ea03031 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -920,9 +920,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - } - set_cyc2ns_scale(tsc_khz, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); + } return 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ec8366c..b5e994a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1462,6 +1462,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f32a025..ef432f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1898,7 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) break; gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - if (kvm_write_guest(kvm, data, + if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, &tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); @@ -5887,6 +5887,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_x86_ops->set_nmi(vcpu); } } else if (kvm_cpu_has_injectable_intr(vcpu)) { + /* + * Because interrupts can be injected asynchronously, we are + * calling check_nested_events again here to avoid a race condition. + * See https://lkml.org/lkml/2014/7/2/60 for discussion about this + * proposal and current concerns. Perhaps we should be setting + * KVM_REQ_EVENT only on certain events and not unconditionally? + */ + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } if (kvm_x86_ops->interrupt_allowed(vcpu)) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b5e6026..c61ea57 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -326,6 +326,27 @@ static void pci_fixup_video(struct pci_dev *pdev) struct pci_bus *bus; u16 config; + if (!vga_default_device()) { + resource_size_t start, end; + int i; + + /* Does firmware framebuffer belong to us? */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + + start = pci_resource_start(pdev, i); + end = pci_resource_end(pdev, i); + + if (!start || !end) + continue; + + if (screen_info.lfb_base >= start && + (screen_info.lfb_base + screen_info.lfb_size) < end) + vga_set_default_device(pdev); + } + } + /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a19ed92..2ae525e 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -162,6 +162,10 @@ pcibios_align_resource(void *data, const struct resource *res, return start; if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; + } else if (res->flags & IORESOURCE_MEM) { + /* The low 1MB range is reserved for ISA cards */ + if (start < BIOS_END) + start = BIOS_END; } return start; } diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index df95a2f..11b65d4 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -93,6 +93,9 @@ static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out, uint64_t flags = GET_LE(&in->sh_flags); bool copy = flags & SHF_ALLOC && + (GET_LE(&in->sh_size) || + (GET_LE(&in->sh_type) != SHT_RELA && + GET_LE(&in->sh_type) != SHT_REL)) && strcmp(name, ".altinstructions") && strcmp(name, ".altinstr_replacement"); diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index e1513c4..5a5176d 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -62,6 +62,9 @@ struct linux_binprm; Only used for the 64-bit and x32 vdsos. */ static unsigned long vdso_addr(unsigned long start, unsigned len) { +#ifdef CONFIG_X86_32 + return 0; +#else unsigned long addr, end; unsigned offset; end = (start + PMD_SIZE - 1) & PMD_MASK; @@ -83,6 +86,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) addr = align_vdso_addr(addr); return addr; +#endif } static int map_vdso(const struct vdso_image *image, bool calculate_addr) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index c985835..ebfa9b2 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -36,99 +36,133 @@ #include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <xen/interface/xen.h> #include <xen/page.h> #include <xen/grant_table.h> +#include <xen/xen.h> #include <asm/pgtable.h> -static int map_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +static struct gnttab_vm_area { + struct vm_struct *area; + pte_t **ptes; +} gnttab_shared_vm_area, gnttab_status_vm_area; + +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + void **__shared) { - unsigned long **frames = (unsigned long **)data; + void *shared = *__shared; + unsigned long addr; + unsigned long i; - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} + if (shared == NULL) + *__shared = shared = gnttab_shared_vm_area.area->addr; -/* - * This function is used to map shared frames to store grant status. It is - * different from map_pte_fn above, the frames type here is uint64_t. - */ -static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - uint64_t **frames = (uint64_t **)data; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; return 0; } -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + grant_status_t **__shared) { + grant_status_t *shared = *__shared; + unsigned long addr; + unsigned long i; + + if (shared == NULL) + *__shared = shared = gnttab_status_vm_area.area->addr; + + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - void **__shared) +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { - int rc; - void *shared = *__shared; + pte_t **ptes; + unsigned long addr; + unsigned long i; - if (shared == NULL) { - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; - } + if (shared == gnttab_status_vm_area.area->addr) + ptes = gnttab_status_vm_area.ptes; + else + ptes = gnttab_shared_vm_area.ptes; - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); - return rc; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, ptes[i], __pte(0)); + addr += PAGE_SIZE; + } } -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - grant_status_t **__shared) +static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) { - int rc; - grant_status_t *shared = *__shared; + area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL); + if (area->ptes == NULL) + return -ENOMEM; - if (shared == NULL) { - /* No need to pass in PTE as we are going to do it - * in apply_to_page_range anyhow. */ - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; + area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes); + if (area->area == NULL) { + kfree(area->ptes); + return -ENOMEM; } - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn_status, &frames); - return rc; + return 0; } -void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) +static void arch_gnttab_vfree(struct gnttab_vm_area *area) +{ + free_vm_area(area->area); + kfree(area->ptes); +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) { - apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); + int ret; + + if (!xen_pv_domain()) + return 0; + + ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); + if (ret < 0) + return ret; + + /* + * Always allocate the space for the status frames in case + * we're migrated to a host with V2 support. + */ + ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); + if (ret < 0) + goto err; + + return 0; + err: + arch_gnttab_vfree(&gnttab_shared_vm_area); + return -ENOMEM; } + #ifdef CONFIG_XEN_PVH #include <xen/balloon.h> #include <xen/events.h> -#include <xen/xen.h> #include <linux/slab.h> static int __init xlated_setup_gnttab_pages(void) { diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index f9e1ec3..8453e6e 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -376,38 +376,42 @@ _DoubleExceptionVector_WindowOverflow: beqz a2, 1f # if at start of vector, don't restore addi a0, a0, -128 - bbsi a0, 8, 1f # don't restore except for overflow 8 and 12 - bbsi a0, 7, 2f + bbsi.l a0, 8, 1f # don't restore except for overflow 8 and 12 + + /* + * This fixup handler is for the extremely unlikely case where the + * overflow handler's reference thru a0 gets a hardware TLB refill + * that bumps out the (distinct, aliasing) TLB entry that mapped its + * prior references thru a9/a13, and where our reference now thru + * a9/a13 gets a 2nd-level miss exception (not hardware TLB refill). + */ + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + bbsi.l a0, 7, 2f /* * Restore a0 as saved by _WindowOverflow8(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a9, and where our reference now thru a9 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a9, -16 - wsr a2, depc # replace the saved a0 - j 1f + l32e a0, a9, -16 + wsr a0, depc # replace the saved a0 + j 3f 2: /* * Restore a0 as saved by _WindowOverflow12(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a13, and where our reference now thru a13 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a13, -16 - wsr a2, depc # replace the saved a0 + l32e a0, a13, -16 + wsr a0, depc # replace the saved a0 +3: + xsr a3, excsave1 + movi a0, 0 + s32i a0, a3, EXC_TABLE_FIXUP + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE 1: /* * Restore WindowBase while leaving all address registers restored. @@ -449,6 +453,7 @@ _DoubleExceptionVector_WindowOverflow: s32i a0, a2, PT_DEPC +_DoubleExceptionVector_handle_exception: addx4 a0, a0, a3 l32i a0, a0, EXC_TABLE_FAST_USER xsr a3, excsave1 @@ -464,11 +469,120 @@ _DoubleExceptionVector_WindowOverflow: rotw -3 j 1b - .end literal_prefix ENDPROC(_DoubleExceptionVector) /* + * Fixup handler for TLB miss in double exception handler for window owerflow. + * We get here with windowbase set to the window that was being spilled and + * a0 trashed. a0 bit 7 determines if this is a call8 (bit clear) or call12 + * (bit set) window. + * + * We do the following here: + * - go to the original window retaining a0 value; + * - set up exception stack to return back to appropriate a0 restore code + * (we'll need to rotate window back and there's no place to save this + * information, use different return address for that); + * - handle the exception; + * - go to the window that was being spilled; + * - set up window_overflow_restore_a0_fixup as a fixup routine; + * - reload a0; + * - restore the original window; + * - reset the default fixup routine; + * - return to user. By the time we get to this fixup handler all information + * about the conditions of the original double exception that happened in + * the window overflow handler is lost, so we just return to userspace to + * retry overflow from start. + * + * a0: value of depc, original value in depc + * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE + * a3: exctable, original value in excsave1 + */ + +ENTRY(window_overflow_restore_a0_fixup) + + rsr a0, ps + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH + rsr a2, windowbase + sub a0, a2, a0 + extui a0, a0, 0, 3 + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + _beqi a0, 1, .Lhandle_1 + _beqi a0, 3, .Lhandle_3 + + .macro overflow_fixup_handle_exception_pane n + + rsr a0, depc + rotw -\n + + xsr a3, excsave1 + wsr a2, depc + l32i a2, a3, EXC_TABLE_KSTK + s32i a0, a2, PT_AREG0 + + movi a0, .Lrestore_\n + s32i a0, a2, PT_DEPC + rsr a0, exccause + j _DoubleExceptionVector_handle_exception + + .endm + + overflow_fixup_handle_exception_pane 2 +.Lhandle_1: + overflow_fixup_handle_exception_pane 1 +.Lhandle_3: + overflow_fixup_handle_exception_pane 3 + + .macro overflow_fixup_restore_a0_pane n + + rotw \n + /* Need to preserve a0 value here to be able to handle exception + * that may occur on a0 reload from stack. It may occur because + * TLB miss handler may not be atomic and pointer to page table + * may be lost before we get here. There are no free registers, + * so we need to use EXC_TABLE_DOUBLE_SAVE area. + */ + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + bbsi.l a0, 7, 1f + l32e a0, a9, -16 + j 2f +1: + l32e a0, a13, -16 +2: + rotw -\n + + .endm + +.Lrestore_2: + overflow_fixup_restore_a0_pane 2 + +.Lset_default_fixup: + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, 0 + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + rfe + +.Lrestore_1: + overflow_fixup_restore_a0_pane 1 + j .Lset_default_fixup +.Lrestore_3: + overflow_fixup_restore_a0_pane 3 + j .Lset_default_fixup + +ENDPROC(window_overflow_restore_a0_fixup) + + .end literal_prefix +/* * Debug interrupt vector * * There is not much space here, so simply jump to another handler. diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ee32c00..d16db6d 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -269,13 +269,13 @@ SECTIONS .UserExceptionVector.literal) SECTION_VECTOR (_DoubleExceptionVector_literal, .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 16, + DOUBLEEXC_VECTOR_VADDR - 40, SIZEOF(.UserExceptionVector.text), .UserExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 32, + 40, .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 4224256..77ed202 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -191,7 +191,7 @@ int __init mem_reserve(unsigned long start, unsigned long end, int must_exist) return -EINVAL; } - if (it && start - it->start < bank_sz) { + if (it && start - it->start <= bank_sz) { if (start == it->start) { if (end - it->start < bank_sz) { it->start = end; |