summaryrefslogtreecommitdiffstats
path: root/arch/microblaze/lib/uaccess_old.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/microblaze/lib/uaccess_old.S')
-rw-r--r--arch/microblaze/lib/uaccess_old.S123
1 files changed, 106 insertions, 17 deletions
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
index 5810cec..f037266 100644
--- a/arch/microblaze/lib/uaccess_old.S
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/linkage.h>
+#include <asm/page.h>
/*
* int __strncpy_user(char *to, char *from, int len);
@@ -33,8 +34,8 @@ __strncpy_user:
* r3 - temp count
* r4 - temp val
*/
+ beqid r7,3f
addik r3,r7,0 /* temp_count = len */
- beqi r3,3f
1:
lbu r4,r6,r0
sb r4,r5,r0
@@ -76,8 +77,8 @@ __strncpy_user:
.type __strnlen_user, @function
.align 4;
__strnlen_user:
+ beqid r6,3f
addik r3,r6,0
- beqi r3,3f
1:
lbu r4,r5,r0
beqid r4,2f /* break on NUL */
@@ -102,6 +103,49 @@ __strnlen_user:
.section __ex_table,"a"
.word 1b,4b
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset) \
+1: lwi r4 , r6, 0x0000 + offset; \
+2: lwi r19, r6, 0x0004 + offset; \
+3: lwi r20, r6, 0x0008 + offset; \
+4: lwi r21, r6, 0x000C + offset; \
+5: lwi r22, r6, 0x0010 + offset; \
+6: lwi r23, r6, 0x0014 + offset; \
+7: lwi r24, r6, 0x0018 + offset; \
+8: lwi r25, r6, 0x001C + offset; \
+9: swi r4 , r5, 0x0000 + offset; \
+10: swi r19, r5, 0x0004 + offset; \
+11: swi r20, r5, 0x0008 + offset; \
+12: swi r21, r5, 0x000C + offset; \
+13: swi r22, r5, 0x0010 + offset; \
+14: swi r23, r5, 0x0014 + offset; \
+15: swi r24, r5, 0x0018 + offset; \
+16: swi r25, r5, 0x001C + offset; \
+ .section __ex_table,"a"; \
+ .word 1b, 0f; \
+ .word 2b, 0f; \
+ .word 3b, 0f; \
+ .word 4b, 0f; \
+ .word 5b, 0f; \
+ .word 6b, 0f; \
+ .word 7b, 0f; \
+ .word 8b, 0f; \
+ .word 9b, 0f; \
+ .word 10b, 0f; \
+ .word 11b, 0f; \
+ .word 12b, 0f; \
+ .word 13b, 0f; \
+ .word 14b, 0f; \
+ .word 15b, 0f; \
+ .word 16b, 0f; \
+ .text
+
+#define COPY_80(offset) \
+ COPY(0x00 + offset);\
+ COPY(0x20 + offset);\
+ COPY(0x40 + offset);\
+ COPY(0x60 + offset);
+
/*
* int __copy_tofrom_user(char *to, char *from, int len)
* Return:
@@ -119,34 +163,79 @@ __copy_tofrom_user:
* r7, r3 - count
* r4 - tempval
*/
- beqid r7, 3f /* zero size is not likely */
- andi r3, r7, 0x3 /* filter add count */
- bneid r3, 4f /* if is odd value then byte copying */
+ beqid r7, 0f /* zero size is not likely */
or r3, r5, r6 /* find if is any to/from unaligned */
- andi r3, r3, 0x3 /* mask unaligned */
- bneid r3, 1f /* it is unaligned -> then jump */
+ or r3, r3, r7 /* find if count is unaligned */
+ andi r3, r3, 0x3 /* mask last 3 bits */
+ bneid r3, bu1 /* if r3 is not zero then byte copying */
+ or r3, r0, r0
+
+ rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+ beqid r3, page;
or r3, r0, r0
-/* at least one 4 byte copy */
-5: lw r4, r6, r3
-6: sw r4, r5, r3
+w1: lw r4, r6, r3 /* at least one 4 byte copy */
+w2: sw r4, r5, r3
addik r7, r7, -4
- bneid r7, 5b
+ bneid r7, w1
addik r3, r3, 4
addik r3, r7, 0
rtsd r15, 8
nop
-4: or r3, r0, r0
-1: lbu r4,r6,r3
-2: sb r4,r5,r3
+
+ .section __ex_table,"a"
+ .word w1, 0f;
+ .word w2, 0f;
+ .text
+
+.align 4 /* Alignment is important to keep icache happy */
+page: /* Create room on stack and save registers for storign values */
+ addik r1, r1, -32
+ swi r19, r1, 4
+ swi r20, r1, 8
+ swi r21, r1, 12
+ swi r22, r1, 16
+ swi r23, r1, 20
+ swi r24, r1, 24
+ swi r25, r1, 28
+loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+ /* Loop unrolling to get performance boost */
+ COPY_80(0x000);
+ COPY_80(0x080);
+ COPY_80(0x100);
+ COPY_80(0x180);
+ /* copy loop */
+ addik r6, r6, 0x200
+ addik r7, r7, -0x200
+ bneid r7, loop
+ addik r5, r5, 0x200
+ /* Restore register content */
+ lwi r19, r1, 4
+ lwi r20, r1, 8
+ lwi r21, r1, 12
+ lwi r22, r1, 16
+ lwi r23, r1, 20
+ lwi r24, r1, 24
+ lwi r25, r1, 28
+ addik r1, r1, 32
+ /* return back */
+ addik r3, r7, 0
+ rtsd r15, 8
+ nop
+
+.align 4 /* Alignment is important to keep icache happy */
+bu1: lbu r4,r6,r3
+bu2: sb r4,r5,r3
addik r7,r7,-1
- bneid r7,1b
+ bneid r7,bu1
addik r3,r3,1 /* delay slot */
-3:
+0:
addik r3,r7,0
rtsd r15,8
nop
.size __copy_tofrom_user, . - __copy_tofrom_user
.section __ex_table,"a"
- .word 1b,3b,2b,3b,5b,3b,6b,3b
+ .word bu1, 0b;
+ .word bu2, 0b;
+ .text
OpenPOWER on IntegriCloud