From de82ecaee5587ea2f8c5c4d0713862530513329c Mon Sep 17 00:00:00 2001 From: ian Date: Sun, 24 Jan 2016 20:15:52 +0000 Subject: MFC r290647, r292523, r292891: ARM: Improve robustness of locore_v6.S and fix errors. - boot page table is not allocated in data section, so must be cleared before use - map only one section (1 MB) for SOCDEV mapping (*) - DSB must be used for ensuring of finishing TLB operations - Invalidate BTB when appropriate Allow armv4/5 kernels to be loaded on any 2MB boundary, like armv6/7. This eliminates the reliance on PHYSADDR and KERNPHYSADDR compile-time symbols (except when the rom-copy code is enabled) by using the current PC and the assumption that the entry-point routine is in the first 1MB section of the text segment. Other cleanups done: - Reduce the initarm() stack size back to 2K. It got increased to 4 * 2K when this file was supporting multicore armv6, but that support is now in locore-v6.S. - When building the temporary startup page tables, map the entire 4GB address space as VA=PA before mapping the kernel at its loaded location. This allows access to boot parameters stored somewhere in ram by the bootloader, regardless of where that may be. - When building the page table entry for supporting EARLY_PRINTF, map the section as uncached unbuffered, since it is presumably device registers. Note that this restores the ability to use loader(8)/ubldr on armv4/5 kernels. That was broken in r283035, the point at which ubldr started loading an arm kernel at any 2MB boundary. Also note that after this, there is no reason to set KERNVIRTADDR to anything other than 0xc0000000, and no need for PHYSADDR or KERNPHYSADDR symbols at all. Bring some of the recent locore-v4.S improvements into locore-V6... - Map all 4GB as VA=PA so that args passed in from a bootloader can be accessed regardless of where they are. - Figure out the kernel load address by directly masking the PC rather then by doing pc-relative math on the _start symbol. - For EARLY_PRINTF support, map device memory as uncacheable (no-op for ARM_NEW_PMAP because all TEX types resolve to uncacheable). --- sys/arm/arm/locore-v4.S | 86 +++++++++++++++++++++++++++---------------------- sys/arm/arm/locore-v6.S | 69 ++++++++++++++++++++++++++++----------- 2 files changed, 98 insertions(+), 57 deletions(-) (limited to 'sys/arm') diff --git a/sys/arm/arm/locore-v4.S b/sys/arm/arm/locore-v4.S index cc4f636..c46c0bf 100644 --- a/sys/arm/arm/locore-v4.S +++ b/sys/arm/arm/locore-v4.S @@ -42,19 +42,8 @@ __FBSDID("$FreeBSD$"); -/* - * Sanity check the configuration. - * FLASHADDR and LOADERRAMADDR depend on PHYSADDR in some cases. - * ARMv4 and ARMv5 make assumptions on where they are loaded. - * - * TODO: Fix the ARMv4/v5 case. - */ -#ifndef PHYSADDR -#error PHYSADDR must be defined for this configuration -#endif - -/* What size should this really be ? It is only used by initarm() */ -#define INIT_ARM_STACK_SIZE (2048 * 4) +/* 2K initial stack is plenty, it is only used by initarm() */ +#define INIT_ARM_STACK_SIZE 2048 #define CPWAIT_BRANCH \ sub pc, pc, #4 @@ -109,6 +98,16 @@ ASENTRY_NP(_start) msr cpsr_c, r7 #if defined (FLASHADDR) && defined(LOADERRAMADDR) +/* + * Sanity check the configuration. + * FLASHADDR and LOADERRAMADDR depend on PHYSADDR in some cases. + * ARMv4 and ARMv5 make assumptions on where they are loaded. + * TODO: Fix the ARMv4/v5 case. + */ +#ifndef PHYSADDR +#error PHYSADDR must be defined for this configuration +#endif + /* Check if we're running from flash. */ ldr r7, =FLASHADDR /* @@ -164,37 +163,45 @@ Lunmapped: * Build page table from scratch. */ - /* Find the delta between VA and PA */ + /* + * Figure out the physical address we're loaded at by assuming this + * entry point code is in the first L1 section and so if we clear the + * offset bits of the pc that will give us the section-aligned load + * address, which remains in r5 throughout all the following code. + */ + ldr r2, =(L1_S_OFFSET) + bic r5, pc, r2 + + /* Find the delta between VA and PA, result stays in r0 throughout. */ adr r0, Lpagetable bl translate_va_to_pa - /* - * Some of the older ports (the various XScale, mostly) assume - * that the memory before the kernel is mapped, and use it for - * the various stacks, page tables, etc. For those CPUs, map the - * 64 first MB of RAM, as it used to be. - */ - /* - * Map PA == VA + /* + * First map the entire 4GB address space as VA=PA. It's mapped as + * normal (cached) memory because it's for things like accessing the + * parameters passed in from the bootloader, which might be at any + * physical address, different for every platform. */ - ldr r5, =PHYSADDR - mov r1, r5 - mov r2, r5 - /* Map 64MiB, preserved over calls to build_pagetables */ - mov r3, #64 + mov r1, #0 + mov r2, #0 + mov r3, #4096 bl build_pagetables - /* Create the kernel map to jump to */ + /* + * Next we do 64MiB starting at the physical load address, mapped to + * the VA the kernel is linked for. + */ mov r1, r5 - ldr r2, =(KERNBASE) + ldr r2, =(KERNVIRTADDR) + mov r3, #64 bl build_pagetables - ldr r5, =(KERNPHYSADDR) + /* Create a device mapping for early_printf if specified. */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) - /* Create the custom map */ ldr r1, =SOCDEV_PA ldr r2, =SOCDEV_VA - bl build_pagetables + mov r3, #1 + bl build_device_pagetables #endif mcr p15, 0, r0, c2, c0, 0 /* Set TTB */ @@ -205,9 +212,6 @@ Lunmapped: mcr p15, 0, r0, c3, c0, 0 /* * Enable MMU. - * On armv6 enable extended page tables, and set alignment checking - * to modulo-4 (CPU_CONTROL_UNAL_ENABLE) for the ldrd/strd - * instructions emitted by clang. */ mrc p15, 0, r0, c1, c0, 0 orr r0, r0, #(CPU_CONTROL_MMU_ENABLE) @@ -217,6 +221,9 @@ Lunmapped: nop CPWAIT(r0) + /* Transition the PC from physical to virtual addressing. */ + ldr pc,=mmu_done + mmu_done: nop adr r1, .Lstart @@ -227,7 +234,6 @@ mmu_done: str r3, [r1], #0x0004 /* get zero init data */ subs r2, r2, #4 bgt .L1 - ldr pc, .Lvirt_done virt_done: mov r1, #28 /* loader info size is 28 bytes also second arg */ @@ -301,21 +307,25 @@ translate_va_to_pa: * * Addresses must be 1MiB aligned */ +build_device_pagetables: + ldr r4, =(L1_TYPE_S|L1_S_AP(AP_KRW)) + b 1f build_pagetables: /* Set the required page attributed */ ldr r4, =(L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)) +1: orr r1, r4 /* Move the virtual address to the correct bit location */ lsr r2, #(L1_S_SHIFT - 2) mov r4, r3 -1: +2: str r1, [r0, r2] add r2, r2, #4 add r1, r1, #(L1_S_SIZE) adds r4, r4, #-1 - bhi 1b + bhi 2b RET diff --git a/sys/arm/arm/locore-v6.S b/sys/arm/arm/locore-v6.S index 7d5ba97..510af6c 100644 --- a/sys/arm/arm/locore-v6.S +++ b/sys/arm/arm/locore-v6.S @@ -103,46 +103,60 @@ ASENTRY_NP(_start) orr r7, #CPU_CONTROL_AFLT_ENABLE orr r7, #CPU_CONTROL_VECRELOC mcr CP15_SCTLR(r7) + DSB ISB bl dcache_inv_poc_all mcr CP15_ICIALLU + DSB ISB /* * Build page table from scratch. */ - /* Calculate the physical address of the startup pagetable. */ + /* + * Figure out the physical address we're loaded at by assuming this + * entry point code is in the first L1 section and so if we clear the + * offset bits of the pc that will give us the section-aligned load + * address, which remains in r5 throughout all the following code. + */ + ldr r2, =(L1_S_OFFSET) + bic r5, pc, r2 + + /* Find the delta between VA and PA, result stays in r0 throughout. */ adr r0, Lpagetable bl translate_va_to_pa - /* - * Map PA == VA + /* + * First map the entire 4GB address space as VA=PA. It's mapped as + * normal (cached) memory because it's for things like accessing the + * parameters passed in from the bootloader, which might be at any + * physical address, different for every platform. */ - /* Find the start kernels load address */ - adr r5, _start - ldr r2, =(PTE1_OFFSET) - bic r5, r2 - mov r1, r5 - mov r2, r5 - /* Map 64MiB, preserved over calls to build_pagetables */ - mov r3, #64 + mov r1, #0 + mov r2, #0 + mov r3, #4096 bl build_pagetables - /* Create the kernel map to jump to */ + /* + * Next we do 64MiB starting at the physical load address, mapped to + * the VA the kernel is linked for. + */ mov r1, r5 ldr r2, =(KERNVIRTADDR) + mov r3, #64 bl build_pagetables + /* Create a device mapping for early_printf if specified. */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) - /* Create the custom map used for early_printf(). */ ldr r1, =SOCDEV_PA ldr r2, =SOCDEV_VA - bl build_pagetables + mov r3, #1 + bl build_device_pagetables #endif bl init_mmu - /* Switch to virtual addresses. */ + /* Transition the PC from physical to virtual addressing. */ ldr pc, =1f 1: @@ -261,7 +275,9 @@ ASENTRY_NP(init_mmu) ISB mcr CP15_TLBIALL /* Flush TLB */ mcr CP15_BPIALL /* Flush Branch predictor */ + DSB ISB + mov pc, lr END(init_mmu) @@ -289,6 +305,7 @@ ASENTRY_NP(reinit_mmu) bl dcache_inv_pou_all #endif mcr CP15_ICIALLU + DSB ISB /* Set auxiliary register */ @@ -297,6 +314,7 @@ ASENTRY_NP(reinit_mmu) eor r8, r8, r6 /* Set bits */ teq r7, r8 mcrne CP15_ACTLR(r8) + DSB ISB /* Enable caches. */ @@ -311,8 +329,8 @@ ASENTRY_NP(reinit_mmu) DSB ISB - /* Flush all TLBs */ - mcr CP15_TLBIALL + mcr CP15_TLBIALL /* Flush TLB */ + mcr CP15_BPIALL /* Flush Branch predictor */ DSB ISB @@ -323,6 +341,7 @@ ASENTRY_NP(reinit_mmu) bl dcache_inv_pou_all #endif mcr CP15_ICIALLU + DSB ISB pop {r4-r11, pc} @@ -339,6 +358,15 @@ END(reinit_mmu) * * Addresses must be 1MiB aligned */ +build_device_pagetables: +#if defined(ARM_NEW_PMAP) + ldr r4, =PTE1_V|PTE1_A|PTE1_AP_KRW|TEX1_CLASS_0 +#elif defined(SMP) + ldr r4, =(L1_TYPE_S|L1_S_AP(AP_KRW)|L1_SHARED) +#else + ldr r4, =(L1_TYPE_S|L1_S_AP(AP_KRW)) +#endif + b 1f build_pagetables: /* Set the required page attributed */ #if defined(ARM_NEW_PMAP) @@ -348,18 +376,19 @@ build_pagetables: #else ldr r4, =(L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)) #endif +1: orr r1, r4 /* Move the virtual address to the correct bit location */ lsr r2, #(PTE1_SHIFT - 2) mov r4, r3 -1: +2: str r1, [r0, r2] add r2, r2, #4 add r1, r1, #(PTE1_SIZE) adds r4, r4, #-1 - bhi 1b + bhi 2b mov pc, lr @@ -412,11 +441,13 @@ ASENTRY_NP(mpentry) orr r0, #CPU_CONTROL_AFLT_ENABLE orr r0, #CPU_CONTROL_VECRELOC mcr CP15_SCTLR(r0) + DSB ISB /* Invalidate L1 cache I+D cache */ bl dcache_inv_pou_all mcr CP15_ICIALLU + DSB ISB /* Find the delta between VA and PA */ -- cgit v1.1