From f52bb722547f43caeaecbcc62db9f3c3b80ead9b Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 29 Jul 2013 20:26:22 +0530 Subject: ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses The current phys_to_virt patching mechanism works only for 32 bit physical addresses and this patch extends the idea for 64bit physical addresses. The 64bit v2p patching mechanism patches the higher 8 bits of physical address with a constant using 'mov' instruction and lower 32bits are patched using 'add'. While this is correct, in those platforms where the lowmem addressable physical memory spawns across 4GB boundary, a carry bit can be produced as a result of addition of lower 32bits. This has to be taken in to account and added in to the upper. The patched __pv_offset and va are added in lower 32bits, where __pv_offset can be in two's complement form when PA_START < VA_START and that can result in a false carry bit. e.g 1) PA = 0x80000000; VA = 0xC0000000 __pv_offset = PA - VA = 0xC0000000 (2's complement) 2) PA = 0x2 80000000; VA = 0xC000000 __pv_offset = PA - VA = 0x1 C0000000 So adding __pv_offset + VA should never result in a true overflow for (1). So in order to differentiate between a true carry, a __pv_offset is extended to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is 2's complement. So 'mvn #0' is inserted instead of 'mov' while patching for the same reason. Since mov, add, sub instruction are to patched with different constants inside the same stub, the rotation field of the opcode is using to differentiate between them. So the above examples for v2p translation becomes for VA=0xC0000000, 1) PA[63:32] = 0xffffffff PA[31:0] = VA + 0xC0000000 --> results in a carry PA[63:32] = PA[63:32] + carry PA[63:0] = 0x0 80000000 2) PA[63:32] = 0x1 PA[31:0] = VA + 0xC0000000 --> results in a carry PA[63:32] = PA[63:32] + carry PA[63:0] = 0x2 80000000 The above ideas were suggested by Nicolas Pitre as part of the review of first and second versions of the subject patch. There is no corresponding change on the phys_to_virt() side, because computations on the upper 32-bits would be discarded anyway. Cc: Russell King Reviewed-by: Nicolas Pitre Signed-off-by: Sricharan R Signed-off-by: Santosh Shilimkar --- arch/arm/kernel/head.S | 63 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 16 deletions(-) (limited to 'arch/arm/kernel/head.S') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c7cc1e..5454794 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -536,6 +536,14 @@ ENTRY(fixup_smp) ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) +#ifdef __ARMEB_ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + #ifdef CONFIG_ARM_PATCH_PHYS_VIRT /* __fixup_pv_table - patch the stub instructions with the delta between @@ -546,17 +554,20 @@ ENDPROC(fixup_smp) __HEAD __fixup_pv_table: adr r0, 1f - ldmia r0, {r3-r5, r7} - sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET + ldmia r0, {r3-r7} + mvn ip, #0 + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET add r4, r4, r3 @ adjust table start address add r5, r5, r3 @ adjust table end address - add r7, r7, r3 @ adjust __pv_phys_offset address - str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset + add r6, r6, r3 @ adjust __pv_phys_offset address + add r7, r7, r3 @ adjust __pv_offset address + str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits mov r6, r3, lsr #24 @ constant for add/sub instructions teq r3, r6, lsl #24 @ must be 16MiB aligned THUMB( it ne @ cross section branch ) bne __error - str r6, [r7, #4] @ save to __pv_offset + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits b __fixup_a_pv_table ENDPROC(__fixup_pv_table) @@ -565,10 +576,19 @@ ENDPROC(__fixup_pv_table) .long __pv_table_begin .long __pv_table_end 2: .long __pv_phys_offset + .long __pv_offset .text __fixup_a_pv_table: + adr r0, 3f + ldr r6, [r0] + add r6, r6, r3 + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + mov r6, r6, lsr #24 + cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction lsls r6, #24 beq 2f clz r7, r6 @@ -582,18 +602,28 @@ __fixup_a_pv_table: b 2f 1: add r7, r3 ldrh ip, [r7, #2] - and ip, 0x8f00 - orr ip, r6 @ mask in offset bits 31-24 + tst ip, #0x4000 + and ip, #0x8f00 + orrne ip, r6 @ mask in offset bits 31-24 + orreq ip, r0 @ mask in offset bits 7-0 strh ip, [r7, #2] + ldrheq ip, [r7] + biceq ip, #0x20 + orreq ip, ip, r0, lsr #16 + strheq ip, [r7] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr #else + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b 2f 1: ldr ip, [r7, r3] bic ip, ip, #0x000000ff - orr ip, ip, r6 @ mask in offset bits 31-24 + tst ip, #0xf00 @ check the rotation field + orrne ip, ip, r6 @ mask in offset bits 31-24 + biceq ip, ip, #0x400000 @ clear bit 22 + orreq ip, ip, r0 @ mask in offset bits 7-0 str ip, [r7, r3] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot @@ -602,28 +632,29 @@ __fixup_a_pv_table: #endif ENDPROC(__fixup_a_pv_table) +3: .long __pv_offset + ENTRY(fixup_pv_table) stmfd sp!, {r4 - r7, lr} - ldr r2, 2f @ get address of __pv_phys_offset mov r3, #0 @ no offset mov r4, r0 @ r0 = table start add r5, r0, r1 @ r1 = table size - ldr r6, [r2, #4] @ get __pv_offset bl __fixup_a_pv_table ldmfd sp!, {r4 - r7, pc} ENDPROC(fixup_pv_table) - .align -2: .long __pv_phys_offset - .data .globl __pv_phys_offset .type __pv_phys_offset, %object __pv_phys_offset: - .long 0 - .size __pv_phys_offset, . - __pv_phys_offset + .quad 0 + .size __pv_phys_offset, . -__pv_phys_offset + + .globl __pv_offset + .type __pv_offset, %object __pv_offset: - .long 0 + .quad 0 + .size __pv_offset, . -__pv_offset #endif #include "head-common.S" -- cgit v1.1 From 2f9bf9beddb1649485b47302a5aba9761cbc9084 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 16:23:08 +0100 Subject: ARM: fixup_pv_table bug when CPU_ENDIAN_BE8 The fixup_pv_table assumes that the instructions are in the same endian configuration as the data, but when the CPU is running in BE8 the instructions stay in little-endian format. Make sure if CONFIG_CPU_ENDIAN_BE8 is set that we do all the alterations to the instructions taking in to account the LDR/STR will be swapping the data endian-ness. Since the code is only modifying a byte, we avoid dual-swapping the data, and just change the bits we clear and ORR in (in the case where the code is not thumb2). For thumb2, we add the necessary rev16 instructions to ensure that the instructions are processed in the correct format, as it was easier than re-writing the code to contain a mask and shift. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin Tested-by: Thomas Petazzoni --- arch/arm/kernel/head.S | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm/kernel/head.S') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c7cc1e..9e5906c 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -582,8 +582,10 @@ __fixup_a_pv_table: b 2f 1: add r7, r3 ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) and ip, 0x8f00 orr ip, r6 @ mask in offset bits 31-24 +ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot @@ -592,8 +594,14 @@ __fixup_a_pv_table: #else b 2f 1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + orr ip, ip, r6, lsl#24 +#else bic ip, ip, #0x000000ff orr ip, ip, r6 @ mask in offset bits 31-24 +#endif str ip, [r7, r3] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot -- cgit v1.1 From 97bcb0fea590d3d704f985bec08f342d28992634 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 09:40:42 +0000 Subject: ARM: set BE8 if LE in head code If we are booting in LE and compiled for BE8, then add code to set the state to bE8. Since the instruction stream is always LE, we do not need to do anything special to the instruction. Also ensure that the secondary processors are started in the same mode. Note, we do add about 20 bytes to the kernel image, but it seems easier to do this than adding another configuration to change. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin Tested-by: Thomas Petazzoni --- arch/arm/kernel/head.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm/kernel/head.S') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 9e5906c..a047acf 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -77,6 +77,7 @@ __HEAD ENTRY(stext) + ARM_BE8(setend be ) @ ensure we are in BE8 mode THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, @@ -352,6 +353,9 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ + + ARM_BE8(setend be) @ ensure we are in BE8 mode + #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install_secondary #endif -- cgit v1.1 From 830fd4d6de1785942e34babe0a8984f72b534b25 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Tue, 29 Oct 2013 07:29:56 +0100 Subject: ARM: 7870/1: head: Fix the missing underscore in __ARMEB__ macro and .align keyword Commit 'f52bb722547f43caeaecbcc62db9f3c3b80ead9b' Author: Sricharan R ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses introduced a __ARMEB__ macro usage in a new place, but missed the second underscore. So correcting it here. Also a explicit .align keyword is needed for the label with .long data-type to be aligned on the 4 byte boundary. Otherwise this can cause problem for thumb2 build. So adding it here. Signed-off-by: Sricharan R Signed-off-by: Russell King --- arch/arm/kernel/head.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel/head.S') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 5454794..32402ba 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -536,7 +536,7 @@ ENTRY(fixup_smp) ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -#ifdef __ARMEB_ +#ifdef __ARMEB__ #define LOW_OFFSET 0x4 #define HIGH_OFFSET 0x0 #else @@ -632,6 +632,7 @@ __fixup_a_pv_table: #endif ENDPROC(__fixup_a_pv_table) + .align 3: .long __pv_offset ENTRY(fixup_pv_table) -- cgit v1.1