summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorChristophe Leroy <christophe.leroy@c-s.fr>2017-08-23 16:54:32 +0200
committerMichael Ellerman <mpe@ellerman.id.au>2017-09-01 16:42:45 +1000
commitda74f659205ea08cb0fd0b3050637b0e0eb31520 (patch)
treef0303ef115eb0acc4c2139b978afdc79f4cf6a90 /arch/powerpc
parent45f62159f3aafe27e2df45a3ec83b32ca7304410 (diff)
downloadop-kernel-dev-da74f659205ea08cb0fd0b3050637b0e0eb31520.zip
op-kernel-dev-da74f659205ea08cb0fd0b3050637b0e0eb31520.tar.gz
powerpc/32: add memset16()
Commit 694fc88ce271f ("powerpc/string: Implement optimized memset variants") added memset16(), memset32() and memset64() for the 64 bits PPC. On 32 bits, memset64() is not relevant, and as shown below, the generic version of memset32() gives a good code, so only memset16() is candidate for an optimised version. 000009c0 <memset32>: 9c0: 2c 05 00 00 cmpwi r5,0 9c4: 39 23 ff fc addi r9,r3,-4 9c8: 4d 82 00 20 beqlr 9cc: 7c a9 03 a6 mtctr r5 9d0: 94 89 00 04 stwu r4,4(r9) 9d4: 42 00 ff fc bdnz 9d0 <memset32+0x10> 9d8: 4e 80 00 20 blr The last part of memset() handling the not 4-bytes multiples operates on bytes, making it unsuitable for handling word without modification. As it would increase memset() complexity, it is better to implement memset16() from scratch. In addition it has the advantage of allowing a more optimised memset16() than what we would have by using the memset() function. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/string.h4
-rw-r--r--arch/powerpc/lib/copy_32.S14
2 files changed, 17 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index b0e8246..cc9adde 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,6 +10,7 @@
#define __HAVE_ARCH_MEMMOVE
#define __HAVE_ARCH_MEMCMP
#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMSET16
extern char * strcpy(char *,const char *);
extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -24,7 +25,6 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
#ifdef CONFIG_PPC64
-#define __HAVE_ARCH_MEMSET16
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
@@ -46,6 +46,8 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
{
return __memset64(p, v, n * 8);
}
+#else
+extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 8aedbb5..a14d4df 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -67,6 +67,20 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
+_GLOBAL(memset16)
+ rlwinm. r0 ,r5, 31, 1, 31
+ addi r6, r3, -4
+ beq- 2f
+ rlwimi r4 ,r4 ,16 ,0 ,15
+ mtctr r0
+1: stwu r4, 4(r6)
+ bdnz 1b
+2: andi. r0, r5, 1
+ beqlr
+ sth r4, 4(r6)
+ blr
+EXPORT_SYMBOL(memset16)
+
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
OpenPOWER on IntegriCloud