diff options
Diffstat (limited to 'arch')
83 files changed, 1846 insertions, 358 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index de7049b..531cdda 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -175,6 +175,9 @@ config ARCH_HAS_CPUFREQ and that the relevant menu configurations are displayed for it. +config ARCH_HAS_BANDGAP + bool + config GENERIC_HWEIGHT bool default y @@ -1450,7 +1453,7 @@ config SMP depends on CPU_V6K || CPU_V7 depends on GENERIC_CLOCKEVENTS depends on HAVE_SMP - depends on MMU + depends on MMU || ARM_MPU select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have @@ -1471,7 +1474,7 @@ config SMP config SMP_ON_UP bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)" - depends on SMP && !XIP_KERNEL + depends on SMP && !XIP_KERNEL && MMU default y help SMP kernels contain instructions which fail on non-SMP processors. @@ -1744,6 +1747,14 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + depends on ARM_LPAE + +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + depends on ARM_LPAE + source "mm/Kconfig" config FORCE_MAX_ZONEORDER diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index c859495..aed66d5 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -50,3 +50,15 @@ config REMAP_VECTORS_TO_RAM Otherwise, say 'y' here. In this case, the kernel will require external support to redirect the hardware exception vectors to the writable versions located at DRAM_BASE. + +config ARM_MPU + bool 'Use the ARM v7 PMSA Compliant MPU' + depends on CPU_V7 + default y + help + Some ARM systems without an MMU have instead a Memory Protection + Unit (MPU) that defines the type and permissions for regions of + memory. + + If your CPU has an MPU then you should choose 'y' here unless you + know that you do not want to use the MPU. diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 77c1411..5b7be8d 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -550,6 +550,13 @@ choice of the tiles using the RS1 memory map, including all new A-class core tiles, FPGA-based SMMs and software models. + config DEBUG_VEXPRESS_UART0_CRX + bool "Use PL011 UART0 at 0xb0090000 (Cortex-R compliant tiles)" + depends on ARCH_VEXPRESS && !MMU + help + This option selects UART0 at 0xb0090000. This is appropriate for + Cortex-R series tiles and SMMs, such as Cortex-R5 and Cortex-R7 + config DEBUG_VT8500_UART0 bool "Use UART0 on VIA/Wondermedia SoCs" depends on ARCH_VT8500 @@ -789,7 +796,8 @@ config DEBUG_LL_INCLUDE default "debug/u300.S" if DEBUG_U300_UART default "debug/ux500.S" if DEBUG_UX500_UART default "debug/vexpress.S" if DEBUG_VEXPRESS_UART0_DETECT || \ - DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1 + DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1 || \ + DEBUG_VEXPRESS_UART0_CRX default "debug/vt8500.S" if DEBUG_VT8500_UART0 default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1 default "mach/debug-macro.S" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 72caf82..c0ac0f5 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -59,38 +59,44 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) +arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m +arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) +arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) endif -arch-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) -arch-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -march=armv4t -arch-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 -march=armv4 -arch-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 -march=armv3 +arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) +arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t +arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 +arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3 + +# Evaluate arch cc-option calls now +arch-y := $(arch-y) # This selects how we optimise for the processor. -tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM720T) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM740T) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM9TDMI) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM940T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) :=$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) -tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 -tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_XSC3) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_FEROCEON) :=$(call cc-option,-mtune=marvell-f,-mtune=xscale) -tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) -tune-$(CONFIG_CPU_V6K) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_ARM7TDMI) =-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) +tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 +tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 +tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale +tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale +tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale) +tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) + +# Evaluate tune cc-option calls now +tune-y := $(tune-y) ifeq ($(CONFIG_AEABI),y) CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork @@ -295,9 +301,10 @@ zImage Image xipImage bootpImage uImage: vmlinux zinstall uinstall install: vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ -%.dtb: scripts +%.dtb: | scripts $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@ +PHONY += dtbs dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) dtbs diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index aabc02a..d1153c8 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path, return fdt_getprop(fdt, offset, property, len); } +static uint32_t get_cell_size(const void *fdt) +{ + int len; + uint32_t cell_size = 1; + const uint32_t *size_len = getprop(fdt, "/", "#size-cells", &len); + + if (size_len) + cell_size = fdt32_to_cpu(*size_len); + return cell_size; +} + static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) { char cmdline[COMMAND_LINE_SIZE]; @@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) int atags_to_fdt(void *atag_list, void *fdt, int total_space) { struct tag *atag = atag_list; - uint32_t mem_reg_property[2 * NR_BANKS]; + /* In the case of 64 bits memory size, need to reserve 2 cells for + * address and size for each bank */ + uint32_t mem_reg_property[2 * 2 * NR_BANKS]; int memcount = 0; - int ret; + int ret, memsize; /* make sure we've got an aligned pointer */ if ((u32)atag_list & 0x3) @@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) continue; if (!atag->u.mem.size) continue; - mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start); - mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size); + memsize = get_cell_size(fdt); + + if (memsize == 2) { + /* if memsize is 2, that means that + * each data needs 2 cells of 32 bits, + * so the data are 64 bits */ + uint64_t *mem_reg_prop64 = + (uint64_t *)mem_reg_property; + mem_reg_prop64[memcount++] = + cpu_to_fdt64(atag->u.mem.start); + mem_reg_prop64[memcount++] = + cpu_to_fdt64(atag->u.mem.size); + } else { + mem_reg_property[memcount++] = + cpu_to_fdt32(atag->u.mem.start); + mem_reg_property[memcount++] = + cpu_to_fdt32(atag->u.mem.size); + } + } else if (atag->hdr.tag == ATAG_INITRD2) { uint32_t initrd_start, initrd_size; initrd_start = atag->u.initrd.start; @@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) } } - if (memcount) - setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount); + if (memcount) { + setprop(fdt, "/memory", "reg", mem_reg_property, + 4 * memcount * memsize); + } return fdt_pack(fdt); } diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 032a8d9..75189f1 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -142,7 +142,6 @@ start: mov r7, r1 @ save architecture ID mov r8, r2 @ save atags pointer -#ifndef __ARM_ARCH_2__ /* * Booting from Angel - need to enter SVC mode and disable * FIQs/IRQs (numeric definitions from angel arm.h source). @@ -158,10 +157,6 @@ not_angel: safe_svcmode_maskall r0 msr spsr_cxsf, r9 @ Save the CPU boot mode in @ SPSR -#else - teqp pc, #0x0c000003 @ turn off interrupts -#endif - /* * Note that some cache flushing and other stuff may * be needed here - is there an Angel SWI call for this? @@ -183,7 +178,19 @@ not_angel: ldr r4, =zreladdr #endif - bl cache_on + /* + * Set up a page table only if it won't overwrite ourself. + * That means r4 < pc && r4 - 16k page directory > &_end. + * Given that r4 > &_end is most unfrequent, we add a rough + * additional 1MB of room for a possible appended DTB. + */ + mov r0, pc + cmp r0, r4 + ldrcc r0, LC0+32 + addcc r0, r0, pc + cmpcc r4, r0 + orrcc r4, r4, #1 @ remember we skipped cache_on + blcs cache_on restart: adr r0, LC0 ldmia r0, {r1, r2, r3, r6, r10, r11, r12} @@ -229,7 +236,7 @@ restart: adr r0, LC0 * r0 = delta * r2 = BSS start * r3 = BSS end - * r4 = final kernel address + * r4 = final kernel address (possibly with LSB set) * r5 = appended dtb size (still unknown) * r6 = _edata * r7 = architecture ID @@ -277,6 +284,7 @@ restart: adr r0, LC0 */ cmp r0, #1 sub r0, r4, #TEXT_OFFSET + bic r0, r0, #1 add r0, r0, #0x100 mov r1, r6 sub r2, sp, r6 @@ -323,12 +331,13 @@ dtb_check_done: /* * Check to see if we will overwrite ourselves. - * r4 = final kernel address + * r4 = final kernel address (possibly with LSB set) * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP * We basically want: * r4 - 16k page directory >= r10 -> OK * r4 + image length <= address of wont_overwrite -> OK + * Note: the possible LSB in r4 is harmless here. */ add r10, r10, #16384 cmp r4, r10 @@ -390,7 +399,8 @@ dtb_check_done: add sp, sp, r6 #endif - bl cache_clean_flush + tst r4, #1 + bleq cache_clean_flush adr r0, BSYM(restart) add r0, r0, r6 @@ -402,7 +412,7 @@ wont_overwrite: * r0 = delta * r2 = BSS start * r3 = BSS end - * r4 = kernel execution address + * r4 = kernel execution address (possibly with LSB set) * r5 = appended dtb size (0 if not present) * r7 = architecture ID * r8 = atags pointer @@ -465,6 +475,15 @@ not_relocated: mov r0, #0 cmp r2, r3 blo 1b + /* + * Did we skip the cache setup earlier? + * That is indicated by the LSB in r4. + * Do it now if so. + */ + tst r4, #1 + bic r4, r4, #1 + blne cache_on + /* * The C runtime environment should now be setup sufficiently. * Set up some pointers, and start decompressing. @@ -513,6 +532,7 @@ LC0: .word LC0 @ r1 .word _got_start @ r11 .word _got_end @ ip .word .L_user_stack_end @ sp + .word _end - restart + 16384 + 1024*1024 .size LC0, . - LC0 #ifdef CONFIG_ARCH_RPC diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index 17979d5..c0cdf66 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -50,10 +50,10 @@ }; L2: l2-cache { - compatible = "arm,pl310-cache"; - reg = <0x3ff20000 0x1000>; - cache-unified; - cache-level = <2>; + compatible = "bcm,bcm11351-a2-pl310-cache"; + reg = <0x3ff20000 0x1000>; + cache-unified; + cache-level = <2>; }; timer@35006000 { diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 8178705..80f033614 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -32,11 +32,11 @@ 1901: adr r0, 1902b bl printascii mov r0, r9 - bl printhex8 + bl printhex2 adr r0, 1903b bl printascii mov r0, r10 - bl printhex8 + bl printhex2 adr r0, 1904b bl printascii #endif diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 3caed0d..510e5b1 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -19,10 +19,6 @@ #include <asm/smp.h> #include <asm/smp_plat.h> -static void __init simple_smp_init_cpus(void) -{ -} - static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle) { unsigned int mpidr, pcpu, pcluster, ret; @@ -74,7 +70,6 @@ static void mcpm_cpu_die(unsigned int cpu) #endif static struct smp_operations __initdata mcpm_smp_ops = { - .smp_init_cpus = simple_smp_init_cpus, .smp_boot_secondary = mcpm_boot_secondary, .smp_secondary_init = mcpm_secondary_init, #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 7c1bfc0..accefe0 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index cedd372..6493802 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -23,6 +23,11 @@ #define CR_RR (1 << 14) /* Round Robin cache replacement */ #define CR_L4 (1 << 15) /* LDR pc can set T bit */ #define CR_DT (1 << 16) +#ifdef CONFIG_MMU +#define CR_HA (1 << 17) /* Hardware management of Access Flag */ +#else +#define CR_BR (1 << 17) /* MPU Background region enable (PMSA) */ +#endif #define CR_IT (1 << 18) #define CR_ST (1 << 19) #define CR_FI (1 << 21) /* Fast interrupt (lower latency mode) */ diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index d7deb62..8c25dc4 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -8,6 +8,7 @@ #define CPUID_CACHETYPE 1 #define CPUID_TCM 2 #define CPUID_TLBTYPE 3 +#define CPUID_MPUIR 4 #define CPUID_MPIDR 5 #ifdef CONFIG_CPU_V7M diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index fe92ccf..191ada6 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -46,7 +46,7 @@ __rem; \ }) -#if __GNUC__ < 4 +#if __GNUC__ < 4 || !defined(CONFIG_AEABI) /* * gcc versions earlier than 4.0 are simply too problematic for the diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h index e6168c0..74a8b84 100644 --- a/arch/arm/include/asm/glue-proc.h +++ b/arch/arm/include/asm/glue-proc.h @@ -230,21 +230,21 @@ # endif #endif -#ifdef CONFIG_CPU_PJ4B +#ifdef CONFIG_CPU_V7M # ifdef CPU_NAME # undef MULTI_CPU # define MULTI_CPU # else -# define CPU_NAME cpu_pj4b +# define CPU_NAME cpu_v7m # endif #endif -#ifdef CONFIG_CPU_V7M +#ifdef CONFIG_CPU_PJ4B # ifdef CPU_NAME # undef MULTI_CPU # define MULTI_CPU # else -# define CPU_NAME cpu_v7m +# define CPU_NAME cpu_pj4b # endif #endif diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h new file mode 100644 index 0000000..d4014fb --- /dev/null +++ b/arch/arm/include/asm/hugetlb-3level.h @@ -0,0 +1,71 @@ +/* + * arch/arm/include/asm/hugetlb-3level.h + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_ARM_HUGETLB_3LEVEL_H +#define _ASM_ARM_HUGETLB_3LEVEL_H + + +/* + * If our huge pte is non-zero then mark the valid bit. + * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero + * ptes. + * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes). + */ +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + pte_t retval = *ptep; + if (pte_val(retval)) + pte_val(retval) |= L_PTE_VALID; + return retval; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */ diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h new file mode 100644 index 0000000..1f1b1cd --- /dev/null +++ b/arch/arm/include/asm/hugetlb.h @@ -0,0 +1,84 @@ +/* + * arch/arm/include/asm/hugetlb.h + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_ARM_HUGETLB_H +#define _ASM_ARM_HUGETLB_H + +#include <asm/page.h> +#include <asm-generic/hugetlb.h> + +#include <asm/hugetlb-3level.h> + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* _ASM_ARM_HUGETLB_H */ diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 652b560..d070741 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -130,16 +130,16 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) */ extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long, size_t, unsigned int, void *); -extern void __iomem *__arm_ioremap_caller(unsigned long, size_t, unsigned int, +extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, void *); extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap(unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap_exec(unsigned long, size_t, bool cached); +extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int); +extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); extern void __iounmap(volatile void __iomem *addr); extern void __arm_iounmap(volatile void __iomem *addr); -extern void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, +extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); extern void (*arch_iounmap)(volatile void __iomem *); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 57870ab..584786f 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,6 +18,8 @@ #include <linux/types.h> #include <linux/sizes.h> +#include <asm/cache.h> + #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif @@ -141,6 +143,20 @@ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) +/* + * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed + * around in head.S and proc-*.S are shifted by this amount, in order to + * leave spare high bits for systems with physical address extension. This + * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but + * gives us about 38-bits or so. + */ +#ifdef CONFIG_ARM_LPAE +#define ARCH_PGD_SHIFT L1_CACHE_SHIFT +#else +#define ARCH_PGD_SHIFT 0 +#endif +#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) + #ifndef __ASSEMBLY__ /* @@ -207,7 +223,7 @@ static inline unsigned long __phys_to_virt(unsigned long x) * direct-mapped view. We assume this is the first page * of RAM in the mem_map as well. */ -#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT) +#define PHYS_PFN_OFFSET ((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT)) /* * These are *only* valid on the kernel direct mapped RAM memory. diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index a7b85e0..b5792b7 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -18,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/proc-fns.h> +#include <asm/smp_plat.h> #include <asm-generic/mm_hooks.h> void __check_vmalloc_seq(struct mm_struct *mm); @@ -27,7 +28,15 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) -DECLARE_PER_CPU(atomic64_t, active_asids); +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask); +#else /* !CONFIG_ARM_ERRATA_798181 */ +static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ +} +#endif /* CONFIG_ARM_ERRATA_798181 */ #else /* !CONFIG_CPU_HAS_ASID */ @@ -98,12 +107,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #ifdef CONFIG_MMU unsigned int cpu = smp_processor_id(); -#ifdef CONFIG_SMP - /* check for possible thread migration */ - if (!cpumask_empty(mm_cpumask(next)) && + /* + * __sync_icache_dcache doesn't broadcast the I-cache invalidation, + * so check for possible thread migration and invalidate the I-cache + * if we're new to this CPU. + */ + if (cache_ops_need_broadcast() && + !cpumask_empty(mm_cpumask(next)) && !cpumask_test_cpu(cpu, mm_cpumask(next))) __flush_icache_all(); -#endif + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) { check_and_switch_context(next, tsk); if (cache_is_vivt()) diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h new file mode 100644 index 0000000..c3247cc --- /dev/null +++ b/arch/arm/include/asm/mpu.h @@ -0,0 +1,76 @@ +#ifndef __ARM_MPU_H +#define __ARM_MPU_H + +#ifdef CONFIG_ARM_MPU + +/* MPUIR layout */ +#define MPUIR_nU 1 +#define MPUIR_DREGION 8 +#define MPUIR_IREGION 16 +#define MPUIR_DREGION_SZMASK (0xFF << MPUIR_DREGION) +#define MPUIR_IREGION_SZMASK (0xFF << MPUIR_IREGION) + +/* ID_MMFR0 data relevant to MPU */ +#define MMFR0_PMSA (0xF << 4) +#define MMFR0_PMSAv7 (3 << 4) + +/* MPU D/I Size Register fields */ +#define MPU_RSR_SZ 1 +#define MPU_RSR_EN 0 + +/* The D/I RSR value for an enabled region spanning the whole of memory */ +#define MPU_RSR_ALL_MEM 63 + +/* Individual bits in the DR/IR ACR */ +#define MPU_ACR_XN (1 << 12) +#define MPU_ACR_SHARED (1 << 2) + +/* C, B and TEX[2:0] bits only have semantic meanings when grouped */ +#define MPU_RGN_CACHEABLE 0xB +#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED) +#define MPU_RGN_STRONGLY_ORDERED 0 + +/* Main region should only be shared for SMP */ +#ifdef CONFIG_SMP +#define MPU_RGN_NORMAL (MPU_RGN_CACHEABLE | MPU_ACR_SHARED) +#else +#define MPU_RGN_NORMAL MPU_RGN_CACHEABLE +#endif + +/* Access permission bits of ACR (only define those that we use)*/ +#define MPU_AP_PL1RW_PL0RW (0x3 << 8) +#define MPU_AP_PL1RW_PL0R0 (0x2 << 8) +#define MPU_AP_PL1RW_PL0NA (0x1 << 8) + +/* For minimal static MPU region configurations */ +#define MPU_PROBE_REGION 0 +#define MPU_BG_REGION 1 +#define MPU_RAM_REGION 2 +#define MPU_VECTORS_REGION 3 + +/* Maximum number of regions Linux is interested in */ +#define MPU_MAX_REGIONS 16 + +#define MPU_DATA_SIDE 0 +#define MPU_INSTR_SIDE 1 + +#ifndef __ASSEMBLY__ + +struct mpu_rgn { + /* Assume same attributes for d/i-side */ + u32 drbar; + u32 drsr; + u32 dracr; +}; + +struct mpu_rgn_info { + u32 mpuir; + struct mpu_rgn rgns[MPU_MAX_REGIONS]; +}; +extern struct mpu_rgn_info mpu_rgn_info; + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_ARM_MPU */ + +#endif diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 812a494..6363f3d 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -13,7 +13,7 @@ /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index 18f5cef..626989f 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -30,6 +30,7 @@ #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) #define PMD_BIT4 (_AT(pmdval_t, 0)) #define PMD_DOMAIN(x) (_AT(pmdval_t, 0)) #define PMD_APTABLE_SHIFT (61) @@ -41,6 +42,8 @@ */ #define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2) #define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_nG (_AT(pmdval_t, 1) << 11) @@ -66,6 +69,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */ #define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */ #define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ @@ -79,4 +83,24 @@ #define PHYS_MASK_SHIFT (40) #define PHYS_MASK ((1ULL << PHYS_MASK_SHIFT) - 1) +/* + * TTBR0/TTBR1 split (PAGE_OFFSET): + * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) + * 0x80000000: T0SZ = 0, T1SZ = 1 + * 0xc0000000: T0SZ = 0, T1SZ = 2 + * + * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise + * booting secondary CPUs would end up using TTBR1 for the identity + * mapping set up in TTBR0. + */ +#if defined CONFIG_VMSPLIT_2G +#define TTBR1_OFFSET 16 /* skip two L1 entries */ +#elif defined CONFIG_VMSPLIT_3G +#define TTBR1_OFFSET (4096 * (1 + 3)) /* only L2, skip pgd + 3*pmd */ +#else +#define TTBR1_OFFSET 0 +#endif + +#define TTBR1_SIZE (((PAGE_OFFSET >> 30) - 1) << 16) + #endif diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 86b8fe3..5689c18 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -33,7 +33,7 @@ #define PTRS_PER_PMD 512 #define PTRS_PER_PGD 4 -#define PTE_HWTABLE_PTRS (PTRS_PER_PTE) +#define PTE_HWTABLE_PTRS (0) #define PTE_HWTABLE_OFF (0) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u64)) @@ -48,20 +48,28 @@ #define PMD_SHIFT 21 #define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) +#define PMD_MASK (~((1 << PMD_SHIFT) - 1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PGDIR_MASK (~((1 << PGDIR_SHIFT) - 1)) /* * section address mask and size definitions. */ #define SECTION_SHIFT 21 #define SECTION_SIZE (1UL << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) +#define SECTION_MASK (~((1 << SECTION_SHIFT) - 1)) #define USER_PTRS_PER_PGD (PAGE_OFFSET / PGDIR_SIZE) /* + * Hugetlb definitions. + */ +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +/* * "Linux" PTE definitions for LPAE. * * These bits overlap with the hardware bits but the naming is preserved for @@ -79,6 +87,11 @@ #define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ #define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) +#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) + /* * To be used in assembly code with the upper page attributes. */ @@ -166,8 +179,83 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) clean_pmd_entry(pmdp); \ } while (0) +/* + * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes + * that are written to a page table but not for ptes created with mk_pte. + * + * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to + * hugetlb_cow, where it is compared with an entry in a page table. + * This comparison test fails erroneously leading ultimately to a memory leak. + * + * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is + * present before running the comparison. + */ +#define __HAVE_ARCH_PTE_SAME +#define pte_same(pte_a,pte_b) ((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG \ + : pte_val(pte_a)) \ + == (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG \ + : pte_val(pte_b))) + #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext))) +#define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */ +#define pmd_mknotpresent(pmd) (__pmd(0)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY | + PMD_SECT_VALID | PMD_SECT_NONE; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + BUG_ON(addr >= TASK_SIZE); + + /* create a faulting entry if PROT_NONE protected */ + if (pmd_val(pmd) & PMD_SECT_NONE) + pmd_val(pmd) &= ~PMD_SECT_VALID; + + *pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG); + flush_pmd_entry(pmdp); +} + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_PGTABLE_3LEVEL_H */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 229e0dd..04aeb02 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -24,6 +24,9 @@ #include <asm/memory.h> #include <asm/pgtable-hwdef.h> + +#include <asm/tlbflush.h> + #ifdef CONFIG_ARM_LPAE #include <asm/pgtable-3level.h> #else diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index f3628fb..5324c11 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -60,7 +60,7 @@ extern struct processor { /* * Set the page table */ - void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm); + void (*switch_mm)(phys_addr_t pgd_phys, struct mm_struct *mm); /* * Set a possibly extended PTE. Non-extended PTEs should * ignore 'ext'. @@ -82,7 +82,7 @@ extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); extern void cpu_dcache_clean_area(void *, int); -extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); +extern void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); #ifdef CONFIG_ARM_LPAE extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte); #else @@ -116,13 +116,25 @@ extern void cpu_resume(void); #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) #ifdef CONFIG_ARM_LPAE + +#define cpu_get_ttbr(nr) \ + ({ \ + u64 ttbr; \ + __asm__("mrrc p15, " #nr ", %Q0, %R0, c2" \ + : "=r" (ttbr)); \ + ttbr; \ + }) + +#define cpu_set_ttbr(nr, val) \ + do { \ + u64 ttbr = val; \ + __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ + : : "r" (ttbr)); \ + } while (0) + #define cpu_get_pgd() \ ({ \ - unsigned long pg, pg2; \ - __asm__("mrrc p15, 0, %0, %1, c2" \ - : "=r" (pg), "=r" (pg2) \ - : \ - : "cc"); \ + u64 pg = cpu_get_ttbr(0); \ pg &= ~(PTRS_PER_PGD*sizeof(pgd_t)-1); \ (pgd_t *)phys_to_virt(pg); \ }) @@ -137,6 +149,10 @@ extern void cpu_resume(void); }) #endif +#else /*!CONFIG_MMU */ + +#define cpu_switch_mm(pgd,mm) { } + #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index d3a22be..a8cae71c 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -65,7 +65,10 @@ asmlinkage void secondary_start_kernel(void); * Initial data for bringing up a secondary CPU. */ struct secondary_data { - unsigned long pgdir; + union { + unsigned long mpu_rgn_szr; + unsigned long pgdir; + }; unsigned long swapper_pg_dir; void *stack; }; diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index e789832..6462a72 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -26,6 +26,9 @@ static inline bool is_smp(void) } /* all SMP configurations have the extended CPUID registers */ +#ifndef CONFIG_MMU +#define tlb_ops_need_broadcast() 0 +#else static inline int tlb_ops_need_broadcast(void) { if (!is_smp()) @@ -33,6 +36,7 @@ static inline int tlb_ops_need_broadcast(void) return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2; } +#endif #if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7 #define cache_ops_need_broadcast() 0 @@ -66,4 +70,22 @@ static inline int get_logical_index(u32 mpidr) return -EINVAL; } +/* + * NOTE ! Assembly code relies on the following + * structure memory layout in order to carry out load + * multiple from its base address. For more + * information check arch/arm/kernel/sleep.S + */ +struct mpidr_hash { + u32 mask; /* used by sleep.S */ + u32 shift_aff[3]; /* used by sleep.S */ + u32 bits; +}; + +extern struct mpidr_hash mpidr_hash; + +static inline u32 mpidr_hash_size(void) +{ + return 1 << mpidr_hash.bits; +} #endif diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 6220e9f..f8b8965 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -97,19 +97,22 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) static inline int arch_spin_trylock(arch_spinlock_t *lock) { - unsigned long tmp; + unsigned long contended, res; u32 slock; - __asm__ __volatile__( -" ldrex %0, [%2]\n" -" subs %1, %0, %0, ror #16\n" -" addeq %0, %0, %3\n" -" strexeq %1, %0, [%2]" - : "=&r" (slock), "=&r" (tmp) - : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) - : "cc"); - - if (tmp == 0) { + do { + __asm__ __volatile__( + " ldrex %0, [%3]\n" + " mov %2, #0\n" + " subs %1, %0, %0, ror #16\n" + " addeq %0, %0, %4\n" + " strexeq %2, %0, [%3]" + : "=&r" (slock), "=&r" (contended), "=r" (res) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) + : "cc"); + } while (res); + + if (!contended) { smp_mb(); return 1; } else { diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 1c0a551..cd20029 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -1,6 +1,11 @@ #ifndef __ASM_ARM_SUSPEND_H #define __ASM_ARM_SUSPEND_H +struct sleep_save_sp { + u32 *save_ptr_stash; + u32 save_ptr_stash_phys; +}; + extern void cpu_resume(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 1995d1a..214d415 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -58,7 +58,7 @@ struct thread_info { struct cpu_context_save cpu_context; /* cpu context */ __u32 syscall; /* syscall number */ __u8 used_cp[16]; /* thread used copro */ - unsigned long tp_value; + unsigned long tp_value[2]; /* TLS registers */ #ifdef CONFIG_CRUNCH struct crunch_state crunchstate; #endif diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index bdf2b84..46e7cfb 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -204,6 +204,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #endif } +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) #define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index a3625d1..fdbb9e3 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -535,8 +535,33 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, } #endif +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif -#endif /* CONFIG_MMU */ +#elif defined(CONFIG_SMP) /* !CONFIG_MMU */ + +#ifndef __ASSEMBLY__ + +#include <linux/mm_types.h> + +static inline void local_flush_tlb_all(void) { } +static inline void local_flush_tlb_mm(struct mm_struct *mm) { } +static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { } +static inline void local_flush_tlb_kernel_page(unsigned long kaddr) { } +static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { } +static inline void local_flush_bp_all(void) { } + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr); +extern void flush_tlb_kernel_page(unsigned long kaddr); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void flush_bp_all(void); +#endif /* __ASSEMBLY__ */ + +#endif #endif diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index 73409e6..83259b8 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -2,27 +2,30 @@ #define __ASMARM_TLS_H #ifdef __ASSEMBLY__ - .macro set_tls_none, tp, tmp1, tmp2 +#include <asm/asm-offsets.h> + .macro switch_tls_none, base, tp, tpuser, tmp1, tmp2 .endm - .macro set_tls_v6k, tp, tmp1, tmp2 + .macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2 + mrc p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register mcr p15, 0, \tp, c13, c0, 3 @ set TLS register - mov \tmp1, #0 - mcr p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register + mcr p15, 0, \tpuser, c13, c0, 2 @ and the user r/w register + str \tmp2, [\base, #TI_TP_VALUE + 4] @ save it .endm - .macro set_tls_v6, tp, tmp1, tmp2 + .macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2 ldr \tmp1, =elf_hwcap ldr \tmp1, [\tmp1, #0] mov \tmp2, #0xffff0fff tst \tmp1, #HWCAP_TLS @ hardware TLS available? - mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register - movne \tmp1, #0 - mcrne p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register streq \tp, [\tmp2, #-15] @ set TLS value at 0xffff0ff0 + mrcne p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register + mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register + mcrne p15, 0, \tpuser, c13, c0, 2 @ set user r/w register + strne \tmp2, [\base, #TI_TP_VALUE + 4] @ save it .endm - .macro set_tls_software, tp, tmp1, tmp2 + .macro switch_tls_software, base, tp, tpuser, tmp1, tmp2 mov \tmp1, #0xffff0fff str \tp, [\tmp1, #-15] @ set TLS value at 0xffff0ff0 .endm @@ -31,19 +34,30 @@ #ifdef CONFIG_TLS_REG_EMUL #define tls_emu 1 #define has_tls_reg 1 -#define set_tls set_tls_none +#define switch_tls switch_tls_none #elif defined(CONFIG_CPU_V6) #define tls_emu 0 #define has_tls_reg (elf_hwcap & HWCAP_TLS) -#define set_tls set_tls_v6 +#define switch_tls switch_tls_v6 #elif defined(CONFIG_CPU_32v6K) #define tls_emu 0 #define has_tls_reg 1 -#define set_tls set_tls_v6k +#define switch_tls switch_tls_v6k #else #define tls_emu 0 #define has_tls_reg 0 -#define set_tls set_tls_software +#define switch_tls switch_tls_software #endif +#ifndef __ASSEMBLY__ +static inline unsigned long get_tpuser(void) +{ + unsigned long reg = 0; + + if (has_tls_reg && !tls_emu) + __asm__("mrc p15, 0, %0, c13, c0, 2" : "=r" (reg)); + + return reg; +} +#endif #endif /* __ASMARM_TLS_H */ diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S index dc8e882..acafb22 100644 --- a/arch/arm/include/debug/vexpress.S +++ b/arch/arm/include/debug/vexpress.S @@ -16,6 +16,8 @@ #define DEBUG_LL_PHYS_BASE_RS1 0x1c000000 #define DEBUG_LL_UART_OFFSET_RS1 0x00090000 +#define DEBUG_LL_UART_PHYS_CRX 0xb0090000 + #define DEBUG_LL_VIRT_BASE 0xf8000000 #if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT) @@ -67,6 +69,14 @@ #include <asm/hardware/debug-pl01x.S> +#elif defined(CONFIG_DEBUG_VEXPRESS_UART0_CRX) + + .macro addruart,rp,tmp,tmp2 + ldr \rp, =DEBUG_LL_UART_PHYS_CRX + .endm + +#include <asm/hardware/debug-pl01x.S> + #else /* CONFIG_DEBUG_LL_UART_NONE */ .macro addruart, rp, rv, tmp diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 3688fd1..6d34d08 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -25,6 +25,6 @@ #define HWCAP_IDIVT (1 << 18) #define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */ #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) - +#define HWCAP_LPAE (1 << 20) #endif /* _UAPI__ASMARM_HWCAP_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index f4285b5..fccfbdb 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -38,7 +38,10 @@ obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o -obj-$(CONFIG_SMP) += smp.o smp_tlb.o +obj-$(CONFIG_SMP) += smp.o +ifdef CONFIG_MMU +obj-$(CONFIG_SMP) += smp_tlb.o +endif obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce..ded0417 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/procinfo.h> +#include <asm/suspend.h> #include <asm/hardware/cache-l2x0.h> #include <linux/kbuild.h> @@ -145,6 +146,11 @@ int main(void) #ifdef MULTI_CACHE DEFINE(CACHE_FLUSH_KERN_ALL, offsetof(struct cpu_cache_fns, flush_kern_all)); #endif +#ifdef CONFIG_ARM_CPU_SUSPEND + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif BLANK(); DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 582b405..a39cfc2a1 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -685,15 +685,16 @@ ENTRY(__switch_to) UNWIND(.fnstart ) UNWIND(.cantunwind ) add ip, r1, #TI_CPU_SAVE - ldr r3, [r2, #TI_TP_VALUE] ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack THUMB( str sp, [ip], #4 ) THUMB( str lr, [ip], #4 ) + ldr r4, [r2, #TI_TP_VALUE] + ldr r5, [r2, #TI_TP_VALUE + 4] #ifdef CONFIG_CPU_USE_DOMAINS ldr r6, [r2, #TI_CPU_DOMAIN] #endif - set_tls r3, r4, r5 + switch_tls r1, r4, r5, r3, r7 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) ldr r7, [r2, #TI_TASK] ldr r8, =__stack_chk_guard diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 85a72b0..94104bf 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -366,6 +366,16 @@ ENTRY(vector_swi) #endif zero_fp +#ifdef CONFIG_ALIGNMENT_TRAP + ldr ip, __cr_alignment + ldr ip, [ip] + mcr p15, 0, ip, c1, c0 @ update control register +#endif + + enable_irq + ct_user_exit + get_thread_info tsk + /* * Get the system call number. */ @@ -379,9 +389,9 @@ ENTRY(vector_swi) #ifdef CONFIG_ARM_THUMB tst r8, #PSR_T_BIT movne r10, #0 @ no thumb OABI emulation - ldreq r10, [lr, #-4] @ get SWI instruction + USER( ldreq r10, [lr, #-4] ) @ get SWI instruction #else - ldr r10, [lr, #-4] @ get SWI instruction + USER( ldr r10, [lr, #-4] ) @ get SWI instruction #endif #ifdef CONFIG_CPU_ENDIAN_BE8 rev r10, r10 @ little endian instruction @@ -396,22 +406,13 @@ ENTRY(vector_swi) /* Legacy ABI only, possibly thumb mode. */ tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in - ldreq scno, [lr, #-4] + USER( ldreq scno, [lr, #-4] ) #else /* Legacy ABI only. */ - ldr scno, [lr, #-4] @ get SWI instruction + USER( ldr scno, [lr, #-4] ) @ get SWI instruction #endif -#ifdef CONFIG_ALIGNMENT_TRAP - ldr ip, __cr_alignment - ldr ip, [ip] - mcr p15, 0, ip, c1, c0 @ update control register -#endif - enable_irq - ct_user_exit - - get_thread_info tsk adr tbl, sys_call_table @ load syscall table pointer #if defined(CONFIG_OABI_COMPAT) @@ -446,6 +447,21 @@ local_restart: eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall b sys_ni_syscall @ not private func + +#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) + /* + * We failed to handle a fault trying to access the page + * containing the swi instruction, but we're not really in a + * position to return -EFAULT. Instead, return back to the + * instruction and re-enter the user fault handling path trying + * to page it in. This will likely result in sending SEGV to the + * current task. + */ +9001: + sub lr, lr, #4 + str lr, [sp, #S_PC] + b ret_fast_syscall +#endif ENDPROC(vector_swi) /* diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 8812ce8..75f14cc 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -17,9 +17,12 @@ #include <asm/assembler.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#include <asm/memory.h> #include <asm/cp15.h> #include <asm/thread_info.h> #include <asm/v7m.h> +#include <asm/mpu.h> +#include <asm/page.h> /* * Kernel startup entry point. @@ -63,12 +66,74 @@ ENTRY(stext) movs r10, r5 @ invalid processor (r5=0)? beq __error_p @ yes, error 'p' - adr lr, BSYM(__after_proc_init) @ return (PIC) address +#ifdef CONFIG_ARM_MPU + /* Calculate the size of a region covering just the kernel */ + ldr r5, =PHYS_OFFSET @ Region start: PHYS_OFFSET + ldr r6, =(_end) @ Cover whole kernel + sub r6, r6, r5 @ Minimum size of region to map + clz r6, r6 @ Region size must be 2^N... + rsb r6, r6, #31 @ ...so round up region size + lsl r6, r6, #MPU_RSR_SZ @ Put size in right field + orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit + bl __setup_mpu +#endif + ldr r13, =__mmap_switched @ address to jump to after + @ initialising sctlr + adr lr, BSYM(1f) @ return (PIC) address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) THUMB( mov pc, r12 ) + 1: b __after_proc_init ENDPROC(stext) +#ifdef CONFIG_SMP + __CPUINIT +ENTRY(secondary_startup) + /* + * Common entry point for secondary CPUs. + * + * Ensure that we're in SVC mode, and IRQs are disabled. Lookup + * the processor type - there is no need to check the machine type + * as it has already been validated by the primary processor. + */ + setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 +#ifndef CONFIG_CPU_CP15 + ldr r9, =CONFIG_PROCESSOR_ID +#else + mrc p15, 0, r9, c0, c0 @ get processor id +#endif + bl __lookup_processor_type @ r5=procinfo r9=cpuid + movs r10, r5 @ invalid processor? + beq __error_p @ yes, error 'p' + + adr r4, __secondary_data + ldmia r4, {r7, r12} + +#ifdef CONFIG_ARM_MPU + /* Use MPU region info supplied by __cpu_up */ + ldr r6, [r7] @ get secondary_data.mpu_szr + bl __setup_mpu @ Initialize the MPU +#endif + + adr lr, BSYM(__after_proc_init) @ return address + mov r13, r12 @ __secondary_switched address + ARM( add pc, r10, #PROCINFO_INITFUNC ) + THUMB( add r12, r10, #PROCINFO_INITFUNC ) + THUMB( mov pc, r12 ) +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) + ldr sp, [r7, #8] @ set up the stack pointer + mov fp, #0 + b secondary_start_kernel +ENDPROC(__secondary_switched) + + .type __secondary_data, %object +__secondary_data: + .long secondary_data + .long __secondary_switched +#endif /* CONFIG_SMP */ + /* * Set the Control Register and Read the process ID. */ @@ -99,10 +164,97 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - - b __mmap_switched @ clear the BSS and jump - @ to start_kernel + mov pc, r13 ENDPROC(__after_proc_init) .ltorg +#ifdef CONFIG_ARM_MPU + + +/* Set which MPU region should be programmed */ +.macro set_region_nr tmp, rgnr + mov \tmp, \rgnr @ Use static region numbers + mcr p15, 0, \tmp, c6, c2, 0 @ Write RGNR +.endm + +/* Setup a single MPU region, either D or I side (D-side for unified) */ +.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE + mcr p15, 0, \bar, c6, c1, (0 + \side) @ I/DRBAR + mcr p15, 0, \acr, c6, c1, (4 + \side) @ I/DRACR + mcr p15, 0, \sr, c6, c1, (2 + \side) @ I/DRSR +.endm + +/* + * Setup the MPU and initial MPU Regions. We create the following regions: + * Region 0: Use this for probing the MPU details, so leave disabled. + * Region 1: Background region - covers the whole of RAM as strongly ordered + * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6 + * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page + * + * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION +*/ + +ENTRY(__setup_mpu) + + /* Probe for v7 PMSA compliance */ + mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 + and r0, r0, #(MMFR0_PMSA) @ PMSA field + teq r0, #(MMFR0_PMSAv7) @ PMSA v7 + bne __error_p @ Fail: ARM_MPU on NOT v7 PMSA + + /* Determine whether the D/I-side memory map is unified. We set the + * flags here and continue to use them for the rest of this function */ + mrc p15, 0, r0, c0, c0, 4 @ MPUIR + ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU + beq __error_p @ Fail: ARM_MPU and no MPU + tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified + + /* Setup second region first to free up r6 */ + set_region_nr r0, #MPU_RAM_REGION + isb + /* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */ + ldr r0, =PHYS_OFFSET @ RAM starts at PHYS_OFFSET + ldr r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL) + + setup_region r0, r5, r6, MPU_DATA_SIDE @ PHYS_OFFSET, shared, enabled + beq 1f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled +1: isb + + /* First/background region */ + set_region_nr r0, #MPU_BG_REGION + isb + /* Execute Never, strongly ordered, inaccessible to PL0, rw PL1 */ + mov r0, #0 @ BG region starts at 0x0 + ldr r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA) + mov r6, #MPU_RSR_ALL_MEM @ 4GB region, enabled + + setup_region r0, r5, r6, MPU_DATA_SIDE @ 0x0, BG region, enabled + beq 2f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled +2: isb + + /* Vectors region */ + set_region_nr r0, #MPU_VECTORS_REGION + isb + /* Shared, inaccessible to PL0, rw PL1 */ + mov r0, #CONFIG_VECTORS_BASE @ Cover from VECTORS_BASE + ldr r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL) + /* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */ + mov r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN) + + setup_region r0, r5, r6, MPU_DATA_SIDE @ VECTORS_BASE, PL0 NA, enabled + beq 3f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ VECTORS_BASE, PL0 NA, enabled +3: isb + + /* Enable the MPU */ + mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR + bic r0, r0, #CR_BR @ Disable the 'default mem-map' + orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) + mcr p15, 0, r0, c1, c0, 0 @ Enable MPU + isb + mov pc,lr +ENDPROC(__setup_mpu) +#endif #include "head-common.S" diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 8bac553..45e8935 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -156,7 +156,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = physical page table address + * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -331,6 +331,7 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table + mov r4, r4, lsr #ARCH_PGD_SHIFT #endif mov pc, lr ENDPROC(__create_page_tables) @@ -408,7 +409,7 @@ __secondary_data: * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table pointer + * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -427,10 +428,7 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifdef CONFIG_ARM_LPAE - mov r5, #0 - mcrr p15, 0, r4, r5, c2 @ load TTBR0 -#else +#ifndef CONFIG_ARM_LPAE mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 1315c4c..4910232 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -153,6 +153,13 @@ THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL + mov r7, #0 + mcrr p15, 4, r7, r7, c14 @ CNTVOFF + + @ Disable virtual timer in case it was counting + mrc p15, 0, r7, c14, c3, 1 @ CNTV_CTL + bic r7, #1 @ Clear ENABLE + mcr p15, 0, r7, c14, c3, 1 @ CNTV_CTL 1: #endif diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 8c3094d..d9f5cd4 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; } + perf_callchain_store(entry, regs->ARM_pc); tail = (struct frame_tail __user *)regs->ARM_fp - 1; while ((entry->nr < PERF_MAX_STACK_DEPTH) && diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 6e8931c..7f1efcd 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -39,6 +39,7 @@ #include <asm/thread_notify.h> #include <asm/stacktrace.h> #include <asm/mach/time.h> +#include <asm/tls.h> #ifdef CONFIG_CC_STACKPROTECTOR #include <linux/stackprotector.h> @@ -374,7 +375,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_ptrace_hw_breakpoint(p); if (clone_flags & CLONE_SETTLS) - thread->tp_value = childregs->ARM_r3; + thread->tp_value[0] = childregs->ARM_r3; + thread->tp_value[1] = get_tpuser(); thread_notify(THREAD_NOTIFY_COPY, thread); diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 23a1142..219f1d73 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu) /* We should never return */ panic("psci: cpu %d failed to shutdown\n", cpu); } -#else -#define psci_cpu_die NULL #endif bool __init psci_smp_available(void) @@ -80,5 +78,7 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU .cpu_die = psci_cpu_die, +#endif }; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 03deeff..2bc1514 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request, #endif case PTRACE_GET_THREAD_AREA: - ret = put_user(task_thread_info(child)->tp_value, + ret = put_user(task_thread_info(child)->tp_value[0], datap); break; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1c8278d..9b65327 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -367,7 +367,7 @@ void __init early_print(const char *str, ...) static void __init cpuid_init_hwcaps(void) { - unsigned int divide_instrs; + unsigned int divide_instrs, vmsa; if (cpu_architecture() < CPU_ARCH_ARMv7) return; @@ -380,6 +380,11 @@ static void __init cpuid_init_hwcaps(void) case 1: elf_hwcap |= HWCAP_IDIVT; } + + /* LPAE implies atomic ldrd/strd instructions */ + vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; + if (vmsa >= 5) + elf_hwcap |= HWCAP_LPAE; } static void __init feat_v6_fixup(void) @@ -470,9 +475,82 @@ void __init smp_setup_processor_id(void) for (i = 1; i < nr_cpu_ids; ++i) cpu_logical_map(i) = i == cpu ? 0 : i; + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); + printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr); } +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity; + u32 fs[3], bits[3], ls, mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits 0x%x\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 3; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 24 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + sync_cache_w(&mpidr_hash); +} +#endif + static void __init setup_processor(void) { struct proc_info_list *list; @@ -820,6 +898,7 @@ void __init setup_arch(char **cmdline_p) smp_set_ops(mdesc->smp); } smp_init_cpus(); + smp_build_mpidr_hash(); } #endif @@ -892,6 +971,7 @@ static const char *hwcap_str[] = { "vfpv4", "idiva", "idivt", + "lpae", NULL }; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 296786b..1c16c35 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -392,14 +392,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, if (ksig->ka.sa.sa_flags & SA_SIGINFO) idx += 3; + /* + * Put the sigreturn code on the stack no matter which return + * mechanism we use in order to remain ABI compliant + */ if (__put_user(sigreturn_codes[idx], rc) || __put_user(sigreturn_codes[idx+1], rc+1)) return 1; - if (cpsr & MODE32_BIT) { + if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) { /* * 32-bit code can use the new high-page - * signal return code support. + * signal return code support except when the MPU has + * protected the vectors page from PL0 */ retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb; } else { diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 987dcf3..db1536b 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -7,6 +7,49 @@ .text /* + * Implementation of MPIDR hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @mpidr: register containing MPIDR value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) { + * u32 aff0, aff1, aff2; + * u32 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2); + *} + * Input registers: rs0, rs1, rs2, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = r1 and dst = r1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask + and \mpidr, \mpidr, \mask @ mask out MPIDR bits + and \dst, \mpidr, #0xff @ mask=aff0 + ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0 + THUMB( lsr \dst, \dst, \rs0 ) + and \mask, \mpidr, #0xff00 @ mask = aff1 + ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1) + THUMB( lsr \mask, \mask, \rs1 ) + THUMB( orr \dst, \dst, \mask ) + and \mask, \mpidr, #0xff0000 @ mask = aff2 + ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2) + THUMB( lsr \mask, \mask, \rs2 ) + THUMB( orr \dst, \dst, \mask ) + .endm + +/* * Save CPU state for a suspend. This saves the CPU general purpose * registers, and allocates space on the kernel stack to save the CPU * specific registers and some other data for resume. @@ -29,12 +72,18 @@ ENTRY(__cpu_suspend) mov r1, r4 @ size of save block mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp -#ifdef CONFIG_SMP - ALT_SMP(mrc p15, 0, lr, c0, c0, 5) - ALT_UP(mov lr, #0) - and lr, lr, #15 + ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] + ALT_SMP(mrc p15, 0, r9, c0, c0, 5) + ALT_UP_B(1f) + ldr r8, =mpidr_hash + /* + * This ldmia relies on the memory layout of the mpidr_hash + * struct mpidr_hash. + */ + ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts + compute_mpidr_hash lr, r5, r6, r7, r9, r4 add r3, r3, lr, lsl #2 -#endif +1: bl __cpu_suspend_save adr lr, BSYM(cpu_suspend_abort) ldmfd sp!, {r0, pc} @ call suspend fn @@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu) .data .align ENTRY(cpu_resume) -#ifdef CONFIG_SMP - adr r0, sleep_save_sp - ALT_SMP(mrc p15, 0, r1, c0, c0, 5) - ALT_UP(mov r1, #0) - and r1, r1, #15 - ldr r0, [r0, r1, lsl #2] @ stack phys addr -#else - ldr r0, sleep_save_sp @ stack phys addr -#endif + mov r1, #0 + ALT_SMP(mrc p15, 0, r0, c0, c0, 5) + ALT_UP_B(1f) + adr r2, mpidr_hash_ptr + ldr r3, [r2] + add r2, r2, r3 @ r2 = struct mpidr_hash phys address + /* + * This ldmia relies on the memory layout of the mpidr_hash + * struct mpidr_hash. + */ + ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts + compute_mpidr_hash r1, r4, r5, r6, r0, r3 +1: + adr r0, _sleep_save_sp + ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr r0, [r0, r1, lsl #2] + setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off @ load phys pgd, stack, resume fn ARM( ldmia r0!, {r1, sp, pc} ) @@ -98,7 +155,11 @@ THUMB( mov sp, r2 ) THUMB( bx r3 ) ENDPROC(cpu_resume) -sleep_save_sp: - .rept CONFIG_NR_CPUS - .long 0 @ preserve stack phys ptr here - .endr + .align 2 +mpidr_hash_ptr: + .long mpidr_hash - . @ mpidr_hash struct offset + + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) +_sleep_save_sp: + .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb4..c5fb546 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -45,6 +45,7 @@ #include <asm/smp_plat.h> #include <asm/virt.h> #include <asm/mach/arch.h> +#include <asm/mpu.h> /* * as from 2.5, kernels no longer have an init_tasks structure @@ -78,6 +79,13 @@ void __init smp_set_ops(struct smp_operations *ops) smp_ops = *ops; }; +static unsigned long get_arch_pgd(pgd_t *pgd) +{ + phys_addr_t pgdir = virt_to_phys(pgd); + BUG_ON(pgdir & ARCH_PGD_MASK); + return pgdir >> ARCH_PGD_SHIFT; +} + int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -87,8 +95,14 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) * its stack and the page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; - secondary_data.pgdir = virt_to_phys(idmap_pgd); - secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir); +#ifdef CONFIG_ARM_MPU + secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr; +#endif + +#ifdef CONFIG_MMU + secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); +#endif __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); @@ -112,9 +126,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } - secondary_data.stack = NULL; - secondary_data.pgdir = 0; + memset(&secondary_data, 0, sizeof(secondary_data)); return ret; } diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 9a52a07..a98b62d 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void) static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) { - int cpu, this_cpu; + int this_cpu; cpumask_t mask = { CPU_BITS_NONE }; if (!erratum_a15_798181()) @@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) dummy_flush_tlb_a15_erratum(); this_cpu = get_cpu(); - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - /* - * We only need to send an IPI if the other CPUs are running - * the same ASID as the one being invalidated. There is no - * need for locking around the active_asids check since the - * switch_mm() function has at least one dmb() (as required by - * this workaround) in case a context switch happens on - * another CPU after the condition below. - */ - if (atomic64_read(&mm->context.id) == - atomic64_read(&per_cpu(active_asids, cpu))) - cpumask_set_cpu(cpu, &mask); - } + a15_erratum_get_cpumask(this_cpu, mm, &mask); smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); put_cpu(); } diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index c59c97e..41cf3cb 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -1,15 +1,54 @@ #include <linux/init.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> #include <asm/idmap.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/memory.h> +#include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/tlbflush.h> extern int __cpu_suspend(unsigned long, int (*)(unsigned long)); extern void cpu_resume_mmu(void); +#ifdef CONFIG_MMU +/* + * Hide the first two arguments to __cpu_suspend - these are an implementation + * detail which platform code shouldn't have to know about. + */ +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +{ + struct mm_struct *mm = current->active_mm; + int ret; + + if (!idmap_pgd) + return -EINVAL; + + /* + * Provide a temporary page table with an identity mapping for + * the MMU-enable code, required for resuming. On successful + * resume (indicated by a zero return code), we need to switch + * back to the correct page tables. + */ + ret = __cpu_suspend(arg, fn); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); + local_flush_tlb_all(); + } + + return ret; +} +#else +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +{ + return __cpu_suspend(arg, fn); +} +#define idmap_pgd NULL +#endif + /* * This is called by __cpu_suspend() to save the state, and do whatever * flushing is required to ensure that when the CPU goes to sleep we have @@ -47,30 +86,19 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) virt_to_phys(save_ptr) + sizeof(*save_ptr)); } -/* - * Hide the first two arguments to __cpu_suspend - these are an implementation - * detail which platform code shouldn't have to know about. - */ -int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) -{ - struct mm_struct *mm = current->active_mm; - int ret; - - if (!idmap_pgd) - return -EINVAL; +extern struct sleep_save_sp sleep_save_sp; - /* - * Provide a temporary page table with an identity mapping for - * the MMU-enable code, required for resuming. On successful - * resume (indicated by a zero return code), we need to switch - * back to the correct page tables. - */ - ret = __cpu_suspend(arg, fn); - if (ret == 0) { - cpu_switch_mm(mm->pgd, mm); - local_flush_bp_all(); - local_flush_tlb_all(); - } +static int cpu_suspend_alloc_sp(void) +{ + void *ctx_ptr; + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL); - return ret; + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sync_cache_w(&sleep_save_sp); + return 0; } +early_initcall(cpu_suspend_alloc_sp); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 486e12a..cab094c 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -581,7 +581,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return regs->ARM_r0; case NR(set_tls): - thread->tp_value = regs->ARM_r0; + thread->tp_value[0] = regs->ARM_r0; if (tls_emu) return 0; if (has_tls_reg) { @@ -699,7 +699,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr) int reg = (instr >> 12) & 15; if (reg == 15) return 1; - regs->uregs[reg] = current_thread_info()->tp_value; + regs->uregs[reg] = current_thread_info()->tp_value[0]; regs->ARM_pc += 4; return 0; } diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 3c8f2f0..d43cfb5 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -497,6 +497,10 @@ vcpu .req r0 @ vcpu pointer always in r0 add r5, vcpu, r4 strd r2, r3, [r5] + @ Ensure host CNTVCT == CNTPCT + mov r2, #0 + mcrr p15, 4, r2, r2, c14 @ CNTVOFF + 1: #endif @ Allow physical timer/counter access for the host diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c index b13cc74..8a53f34 100644 --- a/arch/arm/mach-ebsa110/core.c +++ b/arch/arm/mach-ebsa110/core.c @@ -116,7 +116,7 @@ static void __init ebsa110_map_io(void) iotable_init(ebsa110_io_desc, ARRAY_SIZE(ebsa110_io_desc)); } -static void __iomem *ebsa110_ioremap_caller(unsigned long cookie, size_t size, +static void __iomem *ebsa110_ioremap_caller(phys_addr_t cookie, size_t size, unsigned int flags, void *caller) { return (void __iomem *)cookie; diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 2d503b3..f5f65b5 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -93,7 +93,7 @@ config SOC_EXYNOS5440 default y depends on ARCH_EXYNOS5 select ARCH_HAS_OPP - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select AUTO_ZRELADDR select MIGHT_HAVE_PCI select PCI_DOMAINS if PCI diff --git a/arch/arm/mach-imx/mm-imx3.c b/arch/arm/mach-imx/mm-imx3.c index 8f0f606..0884ca9 100644 --- a/arch/arm/mach-imx/mm-imx3.c +++ b/arch/arm/mach-imx/mm-imx3.c @@ -65,7 +65,7 @@ static void imx3_idle(void) : "=r" (reg)); } -static void __iomem *imx3_ioremap_caller(unsigned long phys_addr, size_t size, +static void __iomem *imx3_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { if (mtype == MT_DEVICE) { diff --git a/arch/arm/mach-iop13xx/io.c b/arch/arm/mach-iop13xx/io.c index 183dc8b..faaf7d4 100644 --- a/arch/arm/mach-iop13xx/io.c +++ b/arch/arm/mach-iop13xx/io.c @@ -23,7 +23,7 @@ #include "pci.h" -static void __iomem *__iop13xx_ioremap_caller(unsigned long cookie, +static void __iomem *__iop13xx_ioremap_caller(phys_addr_t cookie, size_t size, unsigned int mtype, void *caller) { void __iomem * retval; diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 6600cff..d7223b3 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -559,7 +559,7 @@ void ixp4xx_restart(char mode, const char *cmd) * fallback to the default. */ -static void __iomem *ixp4xx_ioremap_caller(unsigned long addr, size_t size, +static void __iomem *ixp4xx_ioremap_caller(phys_addr_t addr, size_t size, unsigned int mtype, void *caller) { if (!is_pci_memory(addr)) diff --git a/arch/arm/mach-msm/common.h b/arch/arm/mach-msm/common.h index ce8215a..421cf77 100644 --- a/arch/arm/mach-msm/common.h +++ b/arch/arm/mach-msm/common.h @@ -23,7 +23,7 @@ extern void msm_map_msm8x60_io(void); extern void msm_map_msm8960_io(void); extern void msm_map_qsd8x50_io(void); -extern void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size, +extern void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller); extern struct smp_operations msm_smp_ops; diff --git a/arch/arm/mach-msm/io.c b/arch/arm/mach-msm/io.c index efa113e..3dc04cc 100644 --- a/arch/arm/mach-msm/io.c +++ b/arch/arm/mach-msm/io.c @@ -168,7 +168,7 @@ void __init msm_map_msm7x30_io(void) } #endif /* CONFIG_ARCH_MSM7X30 */ -void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { if (mtype == MT_DEVICE) { diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 1fdb462..c7b32a9 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -4,6 +4,7 @@ config ARCH_OMAP config ARCH_OMAP2PLUS bool "TI OMAP2/3/4/5 SoCs with device tree support" if (ARCH_MULTI_V6 || ARCH_MULTI_V7) select ARCH_HAS_CPUFREQ + select ARCH_HAS_BANDGAP select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_OMAP select ARCH_REQUIRE_GPIOLIB diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index db27e8e..3912ce9 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -23,7 +23,7 @@ config ARCH_R8A73A4 select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC select CPU_V7 - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select SH_CLK_CPG select RENESAS_IRQC @@ -59,7 +59,7 @@ config ARCH_R8A7790 select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC select CPU_V7 - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select SH_CLK_CPG select RENESAS_IRQC diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 84d72fc..65c5ae6 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -60,7 +60,7 @@ config ARCH_TEGRA_3x_SOC config ARCH_TEGRA_114_SOC bool "Enable support for Tegra114 family" - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select ARM_GIC select ARM_L1_CACHE_SHIFT_6 select CPU_FREQ_TABLE if CPU_FREQ diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig index 8958f0d..081d469 100644 --- a/arch/arm/mach-virt/Kconfig +++ b/arch/arm/mach-virt/Kconfig @@ -2,7 +2,7 @@ config ARCH_VIRT bool "Dummy Virtual Machine" if ARCH_MULTI_V7 select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select ARM_PSCI select HAVE_SMP select CPU_V7 diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 9e8101e..6cacdc8 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -392,7 +392,8 @@ config CPU_V7 select CPU_CACHE_V7 select CPU_CACHE_VIPT select CPU_COPY_V6 if MMU - select CPU_CP15_MMU + select CPU_CP15_MMU if MMU + select CPU_CP15_MPU if !MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V7 select CPU_TLB_V7 if MMU diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index ee558a0..ecfe6e5 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c465fac..d70e0ab 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -523,6 +523,147 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_clean_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + static void __init l2x0_of_setup(const struct device_node *np, u32 *aux_val, u32 *aux_mask) { @@ -765,6 +906,21 @@ static const struct l2x0_of_data aurora_no_outer_data = { }, }; +static const struct l2x0_of_data bcm_l2x0_data = { + .setup = pl310_of_setup, + .save = pl310_save, + .outer_cache = { + .resume = pl310_resume, + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + }, +}; + static const struct of_device_id l2x0_ids[] __initconst = { { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, @@ -773,6 +929,8 @@ static const struct of_device_id l2x0_ids[] __initconst = { .data = (void *)&aurora_no_outer_data}, { .compatible = "marvell,aurora-outer-cache", .data = (void *)&aurora_with_outer_data}, + { .compatible = "bcm,bcm11351-a2-pl310-cache", + .data = (void *)&bcm_l2x0_data}, {} }; diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac3737..b55b101 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -20,6 +20,7 @@ #include <asm/smp_plat.h> #include <asm/thread_notify.h> #include <asm/tlbflush.h> +#include <asm/proc-fns.h> /* * On ARMv6, we have the following structure in the Context ID: @@ -39,33 +40,51 @@ * non 64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) -#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) - -#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) -#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) +#define NUM_USER_ASIDS ASID_FIRST_VERSION static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { - unsigned long ttbl = __pa(swapper_pg_dir); - unsigned long ttbh = 0; - /* * Set TTBR0 to swapper_pg_dir which contains only global entries. The * ASID is set to 0. */ - asm volatile( - " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" - : - : "r" (ttbl), "r" (ttbh)); + cpu_set_ttbr(0, __pa(swapper_pg_dir)); isb(); } #else @@ -128,7 +147,16 @@ static void flush_context(unsigned int cpu) asid = 0; } else { asid = atomic64_xchg(&per_cpu(active_asids, i), 0); - __set_bit(ASID_TO_IDX(asid), asid_map); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); } per_cpu(reserved_asids, i) = asid; } @@ -167,17 +195,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs - * as requiring flushes. + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and indicate + * rollover events. */ - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); flush_context(cpu); - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); - asid = generation | IDX_TO_ASID(asid); + asid |= generation; cpumask_clear(mm_cpumask(mm)); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3..7ec0296 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size) #ifdef CONFIG_MMU #ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB +#warning ARM Coherent DMA allocator does not (yet) support huge TLB #endif static void *__alloc_from_contiguous(struct device *dev, size_t size, @@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } /** diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5dbf13f..c97f794 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -491,12 +491,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr, * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ +#ifndef CONFIG_ARM_LPAE static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { do_bad_area(addr, fsr, regs); return 0; } +#endif /* CONFIG_ARM_LPAE */ /* * This abort handler always returns "fault". diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 32aa586..6d5ba9a 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -17,6 +17,7 @@ #include <asm/highmem.h> #include <asm/smp_plat.h> #include <asm/tlbflush.h> +#include <linux/hugetlb.h> #include "mm.h" @@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * coherent with the kernels mapping. */ if (!PageHighMem(page)) { - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + size_t page_size = PAGE_SIZE << compound_order(page); + __cpuc_flush_dcache_area(page_address(page), page_size); } else { - void *addr; - + unsigned long i; if (cache_is_vipt_nonaliasing()) { - addr = kmap_atomic(page); - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_atomic(addr); - } else { - addr = kmap_high_get(page); - if (addr) { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_atomic(page); __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high(page); + kunmap_atomic(addr); + } + } else { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } } } } @@ -287,7 +292,7 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); if (!cache_ops_need_broadcast() && - mapping && !mapping_mapped(mapping)) + mapping && !page_mapped(page)) clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c index 05a4e94..ab4409a 100644 --- a/arch/arm/mm/fsr-3level.c +++ b/arch/arm/mm/fsr-3level.c @@ -9,11 +9,11 @@ static struct fsr_info fsr_info[] = { { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c new file mode 100644 index 0000000..3d1e4a2 --- /dev/null +++ b/arch/arm/mm/hugetlbpage.c @@ -0,0 +1,101 @@ +/* + * arch/arm/mm/hugetlbpage.c + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +/* + * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot + * of type casting from pmd_t * to pte_t *. + */ + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + + return (pte_t *)pmd; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc0..2ffee02 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -36,12 +36,13 @@ #include "mm.h" -static unsigned long phys_initrd_start __initdata = 0; +static phys_addr_t phys_initrd_start __initdata = 0; static unsigned long phys_initrd_size __initdata = 0; static int __init early_initrd(char *p) { - unsigned long start, size; + phys_addr_t start; + unsigned long size; char *endp; start = memparse(p, &endp); @@ -350,14 +351,14 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) #ifdef CONFIG_BLK_DEV_INITRD if (phys_initrd_size && !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { - pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n", - phys_initrd_start, phys_initrd_size); + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } if (phys_initrd_size && memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { - pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n", - phys_initrd_start, phys_initrd_size); + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } if (phys_initrd_size) { @@ -442,7 +443,7 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; - unsigned long pg, pgend; + phys_addr_t pg, pgend; /* * Convert start_pfn/end_pfn to a struct page pointer. @@ -454,8 +455,8 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * Convert to physical addresses, and * round start upwards and end downwards. */ - pg = (unsigned long)PAGE_ALIGN(__pa(start_pg)); - pgend = (unsigned long)__pa(end_pg) & PAGE_MASK; + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; /* * If there are free pages between these, diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 04d9006..f123d6e 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -331,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, return (void __iomem *) (offset + addr); } -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - unsigned long last_addr; + phys_addr_t last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; unsigned long pfn = __phys_to_pfn(phys_addr); @@ -367,12 +367,12 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; void __iomem * -__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return arch_ioremap_caller(phys_addr, size, mtype, __builtin_return_address(0)); @@ -387,7 +387,7 @@ EXPORT_SYMBOL(__arm_ioremap); * CONFIG_GENERIC_ALLOCATOR for allocating external memory. */ void __iomem * -__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached) +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) { unsigned int mtype; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d1d1cef..d7229d2 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -675,7 +675,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, const struct mem_type *type) + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { pud_t *pud = pud_offset(pgd, addr); unsigned long next; @@ -989,27 +990,28 @@ phys_addr_t arm_lowmem_limit __initdata = 0; void __init sanity_check_meminfo(void) { int i, j, highmem = 0; + phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; for (i = 0, j = 0; i < meminfo.nr_banks; i++) { struct membank *bank = &meminfo.bank[j]; - *bank = meminfo.bank[i]; + phys_addr_t size_limit; - if (bank->start > ULONG_MAX) - highmem = 1; + *bank = meminfo.bank[i]; + size_limit = bank->size; -#ifdef CONFIG_HIGHMEM - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) + if (bank->start >= vmalloc_limit) highmem = 1; + else + size_limit = vmalloc_limit - bank->start; bank->highmem = highmem; +#ifdef CONFIG_HIGHMEM /* * Split those memory banks which are partially overlapping * the vmalloc area greatly simplifying things later. */ - if (!highmem && __va(bank->start) < vmalloc_min && - bank->size > vmalloc_min - __va(bank->start)) { + if (!highmem && bank->size > size_limit) { if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " "ignoring high memory\n"); @@ -1018,16 +1020,14 @@ void __init sanity_check_meminfo(void) (meminfo.nr_banks - i) * sizeof(*bank)); meminfo.nr_banks++; i++; - bank[1].size -= vmalloc_min - __va(bank->start); - bank[1].start = __pa(vmalloc_min - 1) + 1; + bank[1].size -= size_limit; + bank[1].start = vmalloc_limit; bank[1].highmem = highmem = 1; j++; } - bank->size = vmalloc_min - __va(bank->start); + bank->size = size_limit; } #else - bank->highmem = highmem; - /* * Highmem banks not allowed with !CONFIG_HIGHMEM. */ @@ -1040,31 +1040,16 @@ void __init sanity_check_meminfo(void) } /* - * Check whether this memory bank would entirely overlap - * the vmalloc area. - */ - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) { - printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " - "(vmalloc region overlap).\n", - (unsigned long long)bank->start, - (unsigned long long)bank->start + bank->size - 1); - continue; - } - - /* * Check whether this memory bank would partially overlap * the vmalloc area. */ - if (__va(bank->start + bank->size - 1) >= vmalloc_min || - __va(bank->start + bank->size - 1) <= __va(bank->start)) { - unsigned long newsize = vmalloc_min - __va(bank->start); + if (bank->size > size_limit) { printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " "to -%.8llx (vmalloc region overlap).\n", (unsigned long long)bank->start, (unsigned long long)bank->start + bank->size - 1, - (unsigned long long)bank->start + newsize - 1); - bank->size = newsize; + (unsigned long long)bank->start + size_limit - 1); + bank->size = size_limit; } #endif if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 5a3aba6..1fa5010 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -8,6 +8,7 @@ #include <linux/pagemap.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/kernel.h> #include <asm/cacheflush.h> #include <asm/sections.h> @@ -15,9 +16,260 @@ #include <asm/setup.h> #include <asm/traps.h> #include <asm/mach/arch.h> +#include <asm/cputype.h> +#include <asm/mpu.h> #include "mm.h" +#ifdef CONFIG_ARM_MPU +struct mpu_rgn_info mpu_rgn_info; + +/* Region number */ +static void rgnr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static void dracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); +} + +/* Region size register */ +static void drsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); +} + +/* Region base address register */ +static void drbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); +} + +static u32 drbar_read(void) +{ + u32 v; + asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); + return v; +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static void iracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); +} + +/* I-side Region size register */ +static void irsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); +} + +/* I-side Region base address register */ +static void irbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); +} + +static unsigned long irbar_read(void) +{ + unsigned long v; + asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); + return v; +} + +/* MPU initialisation functions */ +void __init sanity_check_meminfo_mpu(void) +{ + int i; + struct membank *bank = meminfo.bank; + phys_addr_t phys_offset = PHYS_OFFSET; + phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + + /* Initially only use memory continuous from PHYS_OFFSET */ + if (bank_phys_start(&bank[0]) != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + /* Banks have already been sorted by start address */ + for (i = 1; i < meminfo.nr_banks; i++) { + if (bank[i].start <= bank_phys_end(&bank[0]) && + bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) { + bank[0].size = bank_phys_end(&bank[i]) - bank[0].start; + } else { + pr_notice("Ignoring RAM after 0x%.8lx. " + "First non-contiguous (ignored) bank start: 0x%.8lx\n", + (unsigned long)bank_phys_end(&bank[0]), + (unsigned long)bank_phys_start(&bank[i])); + break; + } + } + /* All contiguous banks are now merged in to the first bank */ + meminfo.nr_banks = 1; + specified_mem_size = bank[0].size; + + /* + * MPU has curious alignment requirements: Size must be power of 2, and + * region start must be aligned to the region size + */ + if (phys_offset != 0) + pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); + + /* + * Maximum aligned region might overflow phys_addr_t if phys_offset is + * 0. Hence we keep everything below 4G until we take the smaller of + * the aligned_region_size and rounded_mem_size, one of which is + * guaranteed to be smaller than the maximum physical address. + */ + aligned_region_size = (phys_offset - 1) ^ (phys_offset); + /* Find the max power-of-two sized region that fits inside our bank */ + rounded_mem_size = (1 << __fls(bank[0].size)) - 1; + + /* The actual region size is the smaller of the two */ + aligned_region_size = aligned_region_size < rounded_mem_size + ? aligned_region_size + 1 + : rounded_mem_size + 1; + + if (aligned_region_size != specified_mem_size) + pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)", + (unsigned long)specified_mem_size, + (unsigned long)aligned_region_size); + + meminfo.bank[0].size = aligned_region_size; + pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n", + (unsigned long)phys_offset, + (unsigned long)aligned_region_size, + (unsigned long)bank_phys_end(&bank[0])); + +} + +static int mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + +static int mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid(CPUID_MPUIR) & MPUIR_nU; +} + +static int mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ + rgnr_write(MPU_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); +} + +static int mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order()) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init mpu_setup(void) +{ + int region_err; + if (!mpu_present()) + return; + + region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, + ilog2(meminfo.bank[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + if (region_err) { + panic("MPU region initialization failure! %d", region_err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Max regions: %d\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_max_regions()); + } +} +#else +static void sanity_check_meminfo_mpu(void) {} +static void __init mpu_setup(void) {} +#endif /* CONFIG_ARM_MPU */ + void __init arm_mm_memblock_reserve(void) { #ifndef CONFIG_CPU_V7M @@ -37,7 +289,9 @@ void __init arm_mm_memblock_reserve(void) void __init sanity_check_meminfo(void) { - phys_addr_t end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); + phys_addr_t end; + sanity_check_meminfo_mpu(); + end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); high_memory = __va(end - 1) + 1; } @@ -48,6 +302,7 @@ void __init sanity_check_meminfo(void) void __init paging_init(struct machine_desc *mdesc) { early_trap_init((void *)CONFIG_VECTORS_BASE); + mpu_setup(); bootmem_init(); } @@ -94,16 +349,16 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, return __arm_ioremap_pfn(pfn, offset, size, mtype); } -void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return (void __iomem *)phys_addr; } EXPORT_SYMBOL(__arm_ioremap); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *); +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { return __arm_ioremap(phys_addr, size, mtype); diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 919405e..2d1ef87 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -140,8 +140,10 @@ ENTRY(cpu_v6_set_pte_ext) ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU mrc p15, 0, r5, c3, c0, 0 @ Domain ID mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 +#endif mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register mrc p15, 0, r8, c1, c0, 2 @ co-processor access control mrc p15, 0, r9, c1, c0, 0 @ control register @@ -158,14 +160,16 @@ ENTRY(cpu_v6_do_resume) mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID ldmia r0, {r4 - r9} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU mcr p15, 0, r5, c3, c0, 0 @ Domain ID ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) ALT_UP(orr r1, r1, #TTB_FLAGS_UP) mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register +#endif mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register mcr p15, 0, r8, c1, c0, 2 @ co-processor access control - mcr p15, 0, ip, c2, c0, 2 @ TTB control register mcr p15, 0, ip, c7, c5, 4 @ ISB mov r0, r9 @ control register b cpu_resume_mmu diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 363027e..5ffe195 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -39,6 +39,14 @@ #define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) #define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) +#ifndef __ARMEB__ +# define rpgdl r0 +# define rpgdh r1 +#else +# define rpgdl r1 +# define rpgdh r0 +#endif + /* * cpu_v7_switch_mm(pgd_phys, tsk) * @@ -47,10 +55,10 @@ */ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU - mmid r1, r1 @ get mm->context.id - asid r3, r1 - mov r3, r3, lsl #(48 - 32) @ ASID - mcrr p15, 0, r0, r3, c2 @ set TTB 0 + mmid r2, r2 + asid r2, r2 + orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd + mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 isb #endif mov pc, lr @@ -106,7 +114,8 @@ ENDPROC(cpu_v7_set_pte_ext) */ .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? (branch below) + mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT + cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) @@ -114,27 +123,21 @@ ENDPROC(cpu_v7_set_pte_ext) ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) /* - * TTBR0/TTBR1 split (PAGE_OFFSET): - * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) - * 0x80000000: T0SZ = 0, T1SZ = 1 - * 0xc0000000: T0SZ = 0, T1SZ = 2 - * - * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise - * booting secondary CPUs would end up using TTBR1 for the identity - * mapping set up in TTBR0. + * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), + * otherwise booting secondary CPUs would end up using TTBR1 for the + * identity mapping set up in TTBR0. */ - bhi 9001f @ PHYS_OFFSET > PAGE_OFFSET? - orr \tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ -#if defined CONFIG_VMSPLIT_2G - /* PAGE_OFFSET == 0x80000000, T1SZ == 1 */ - add \ttbr1, \ttbr1, #1 << 4 @ skip two L1 entries -#elif defined CONFIG_VMSPLIT_3G - /* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */ - add \ttbr1, \ttbr1, #4096 * (1 + 3) @ only L2 used, skip pgd+3*pmd -#endif - /* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */ -9001: mcr p15, 0, \tmp, c2, c0, 2 @ TTB control register - mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ + mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR + mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + addls \ttbr1, \ttbr1, #TTBR1_OFFSET + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 .endm __CPUINIT diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index e35fec3..7ef3ad0 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -98,9 +98,11 @@ ENTRY(cpu_v7_do_suspend) mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID stmia r0!, {r4 - r5} +#ifdef CONFIG_MMU mrc p15, 0, r6, c3, c0, 0 @ Domain ID mrc p15, 0, r7, c2, c0, 1 @ TTB 1 mrc p15, 0, r11, c2, c0, 2 @ TTB control register +#endif mrc p15, 0, r8, c1, c0, 0 @ Control register mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control @@ -110,13 +112,14 @@ ENDPROC(cpu_v7_do_suspend) ENTRY(cpu_v7_do_resume) mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID ldmia r0!, {r4 - r5} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID ldmia r0, {r6 - r11} +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r6, c3, c0, 0 @ Domain ID #ifndef CONFIG_ARM_LPAE ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) @@ -125,14 +128,15 @@ ENTRY(cpu_v7_do_resume) mcr p15, 0, r1, c2, c0, 0 @ TTB 0 mcr p15, 0, r7, c2, c0, 1 @ TTB 1 mcr p15, 0, r11, c2, c0, 2 @ TTB control register - mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register - teq r4, r9 @ Is it already set? - mcrne p15, 0, r9, c1, c0, 1 @ No, so write it - mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control ldr r4, =PRRR @ PRRR ldr r5, =NMRR @ NMRR mcr p15, 0, r4, c10, c2, 0 @ write PRRR mcr p15, 0, r5, c10, c2, 1 @ write NMRR +#endif /* CONFIG_MMU */ + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control isb dsb mov r0, r8 @ control register @@ -178,7 +182,8 @@ ENDPROC(cpu_pj4b_do_idle) */ __v7_ca5mp_setup: __v7_ca9mp_setup: - mov r10, #(1 << 0) @ TLB ops broadcasting +__v7_cr7mp_setup: + mov r10, #(1 << 0) @ Cache/TLB ops broadcasting b 1f __v7_ca7mp_setup: __v7_ca15mp_setup: @@ -443,6 +448,16 @@ __v7_pj4b_proc_info: #endif /* + * ARM Ltd. Cortex R7 processor. + */ + .type __v7_cr7mp_proc_info, #object +__v7_cr7mp_proc_info: + .long 0x410fc170 + .long 0xff0ffff0 + __v7_proc __v7_cr7mp_setup + .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info + + /* * ARM Ltd. Cortex A7 processor. */ .type __v7_ca7mp_proc_info, #object diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index b178d44..2677bc3 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S @@ -11,8 +11,6 @@ #include <linux/linkage.h> #include <linux/init.h> - __INIT - /* * Realview/Versatile Express specific entry point for secondary CPUs. * This provides a "holding pen" into which all secondary cores are held diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index bf6ab242..d56ed11 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -110,16 +110,6 @@ static inline void __cpuinit arch_counter_set_user_access(void) asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); - - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; |