diff options
author | Philippe Bergheaud <felix@linux.vnet.ibm.com> | 2014-04-30 09:12:01 +1000 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2014-04-30 15:26:18 +1000 |
commit | 00f554fadebb96877ad449758dc90303a9826afe (patch) | |
tree | 77a9ff9bfbb32f75a18a7f3032ad7d3d55944c48 /arch/powerpc/lib/memcpy_64.S | |
parent | 48ce3b7cc66b31cd8ae1b8155a82426308f71491 (diff) | |
download | op-kernel-dev-00f554fadebb96877ad449758dc90303a9826afe.zip op-kernel-dev-00f554fadebb96877ad449758dc90303a9826afe.tar.gz |
powerpc: memcpy optimization for 64bit LE
Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
Once booted the feature fixup code switches over to the VMX copy loops
(which are already endian safe).
The question is what we do before that switch over. The base 64bit
memcpy takes alignment exceptions on POWER7 so we can't use it as is.
Fixing the causes of alignment exception would slow it down, because
we'd need to ensure all loads and stores are aligned either through
rotate tricks or bytewise loads and stores. Either would be bad for
all other 64bit platforms.
[ I simplified the loop a bit - Anton ]
Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/lib/memcpy_64.S')
-rw-r--r-- | arch/powerpc/lib/memcpy_64.S | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 72ad055..dc4ba79 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -12,12 +12,27 @@ .align 7 _GLOBAL(memcpy) BEGIN_FTR_SECTION +#ifdef __LITTLE_ENDIAN__ + cmpdi cr7,r5,0 +#else std r3,48(r1) /* save destination pointer for return value */ +#endif FTR_SECTION_ELSE #ifndef SELFTEST b memcpy_power7 #endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#ifdef __LITTLE_ENDIAN__ + /* dumb little-endian memcpy that will get replaced at runtime */ + addi r9,r3,-1 + addi r4,r4,-1 + beqlr cr7 + mtctr r5 +1: lbzu r10,1(r4) + stbu r10,1(r9) + bdnz 1b + blr +#else PPC_MTOCRF(0x01,r5) cmpldi cr1,r5,16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry @@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) stb r0,0(r3) 4: ld r3,48(r1) /* return dest pointer */ blr +#endif |