From 14cf11af6cf608eb8c23e989ddb17a715ddce109 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 26 Sep 2005 16:04:21 +1000 Subject: powerpc: Merge enough to start building in arch/powerpc. This creates the directory structure under arch/powerpc and a bunch of Kconfig files. It does a first-cut merge of arch/powerpc/mm, arch/powerpc/lib and arch/powerpc/platforms/powermac. This is enough to build a 32-bit powermac kernel with ARCH=powerpc. For now we are getting some unmerged files from arch/ppc/kernel and arch/ppc/syslib, or arch/ppc64/kernel. This makes some minor changes to files in those directories and files outside arch/powerpc. The boot directory is still not merged. That's going to be interesting. Signed-off-by: Paul Mackerras --- arch/powerpc/lib/Makefile | 9 + arch/powerpc/lib/checksum.S | 225 ++++++++++++++ arch/powerpc/lib/checksum64.S | 229 ++++++++++++++ arch/powerpc/lib/copy32.S | 543 +++++++++++++++++++++++++++++++++ arch/powerpc/lib/copypage.S | 121 ++++++++ arch/powerpc/lib/copyuser.S | 576 +++++++++++++++++++++++++++++++++++ arch/powerpc/lib/div64.S | 58 ++++ arch/powerpc/lib/e2a.c | 108 +++++++ arch/powerpc/lib/memcpy.S | 172 +++++++++++ arch/powerpc/lib/rheap.c | 693 ++++++++++++++++++++++++++++++++++++++++++ arch/powerpc/lib/sstep.c | 141 +++++++++ arch/powerpc/lib/strcase.c | 23 ++ arch/powerpc/lib/string.S | 203 +++++++++++++ arch/powerpc/lib/usercopy.c | 41 +++ 14 files changed, 3142 insertions(+) create mode 100644 arch/powerpc/lib/Makefile create mode 100644 arch/powerpc/lib/checksum.S create mode 100644 arch/powerpc/lib/checksum64.S create mode 100644 arch/powerpc/lib/copy32.S create mode 100644 arch/powerpc/lib/copypage.S create mode 100644 arch/powerpc/lib/copyuser.S create mode 100644 arch/powerpc/lib/div64.S create mode 100644 arch/powerpc/lib/e2a.c create mode 100644 arch/powerpc/lib/memcpy.S create mode 100644 arch/powerpc/lib/rheap.c create mode 100644 arch/powerpc/lib/sstep.c create mode 100644 arch/powerpc/lib/strcase.c create mode 100644 arch/powerpc/lib/string.S create mode 100644 arch/powerpc/lib/usercopy.c (limited to 'arch/powerpc/lib') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile new file mode 100644 index 0000000..347f979 --- /dev/null +++ b/arch/powerpc/lib/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for ppc-specific library files.. +# + +obj-y := strcase.o string.o +obj-$(CONFIG_PPC32) += div64.o copy32.o checksum.o +obj-$(CONFIG_PPC64) += copypage.o copyuser.o memcpy.o usercopy.o \ + sstep.o checksum64.o +obj-$(CONFIG_PPC_ISERIES) += e2a.o diff --git a/arch/powerpc/lib/checksum.S b/arch/powerpc/lib/checksum.S new file mode 100644 index 0000000..7874e8a --- /dev/null +++ b/arch/powerpc/lib/checksum.S @@ -0,0 +1,225 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include +#include +#include +#include + + .text + +/* + * ip_fast_csum(buf, len) -- Optimized for IP header + * len is in words and is always >= 5. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(saddr, daddr, len, proto, sum) + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + addze r0,r0 /* add in final carry */ + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * csum_partial(buff, len, sum) + */ +_GLOBAL(csum_partial) + addic r0,r5,0 + subi r3,r3,4 + srwi. r6,r4,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r5,r3,2 /* Align buffer to longword boundary */ + beq+ 1f + lhz r5,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r0,r0,r5 + srwi. r6,r4,2 /* # words to do */ + beq 3f +1: mtctr r6 +2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */ + adde r0,r0,r5 /* be unnecessary to unroll this loop */ + bdnz 2b + andi. r4,r4,3 +3: cmpwi 0,r4,2 + blt+ 4f + lhz r5,4(r3) + addi r3,r3,2 + subi r4,r4,2 + adde r0,r0,r5 +4: cmpwi 0,r4,1 + bne+ 5f + lbz r5,4(r3) + slwi r5,r5,8 /* Upper byte of word */ + adde r0,r0,r5 +5: addze r3,r0 /* add in final carry */ + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: srwi. r6,r5,4 /* # groups of 4 words to do */ + beq 10f + mtctr r6 +71: lwz r6,4(r3) +72: lwz r9,8(r3) +73: lwz r10,12(r3) +74: lwzu r11,16(r3) + adde r0,r0,r6 +75: stw r6,4(r4) + adde r0,r0,r9 +76: stw r9,8(r4) + adde r0,r0,r10 +77: stw r10,12(r4) + adde r0,r0,r11 +78: stwu r11,16(r4) + bdnz 71b +10: rlwinm. r6,r5,30,30,31 /* # words left to do */ + beq 13f + mtctr r6 +82: lwzu r9,4(r3) +92: stwu r9,4(r4) + adde r0,r0,r9 + bdnz 82b +13: andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry */ + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + +src_error_4: + mfctr r6 /* update # bytes remaining from ctr */ + rlwimi r5,r6,4,0,27 + b 79f +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 +79: srwi. r6,r5,2 + beq 3f + mtctr r6 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b +src_error: + cmpwi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + +dst_error: + cmpwi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .long 81b,src_error_1 + .long 91b,dst_error + .long 71b,src_error_4 + .long 72b,src_error_4 + .long 73b,src_error_4 + .long 74b,src_error_4 + .long 75b,dst_error + .long 76b,dst_error + .long 77b,dst_error + .long 78b,dst_error + .long 82b,src_error_2 + .long 92b,dst_error + .long 83b,src_error_3 + .long 93b,dst_error + .long 84b,src_error_3 + .long 94b,dst_error + .long 95b,dst_error + .long 96b,dst_error + .long 97b,dst_error diff --git a/arch/powerpc/lib/checksum64.S b/arch/powerpc/lib/checksum64.S new file mode 100644 index 0000000..ef96c6c --- /dev/null +++ b/arch/powerpc/lib/checksum64.S @@ -0,0 +1,229 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include +#include +#include +#include + +/* + * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header + * len is in words and is always >= 5. + * + * In practice len == 5, but this is not guaranteed. So this code does not + * attempt to use doubleword instructions. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32-bit halves together */ + add r0,r0,r4 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) + * No real gain trying to do this specially for 64 bit, but + * the 32 bit addition may spill into the upper bits of + * the doubleword so we still must fold it down from 64. + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + rldicl r4,r0,32,0 /* fold 64 bit value */ + add r0,r4,r0 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). + * + * This code assumes at least halfword alignment, though the length + * can be any number of bytes. The sum is accumulated in r5. + * + * csum_partial(r3=buff, r4=len, r5=sum) + */ +_GLOBAL(csum_partial) + subi r3,r3,8 /* we'll offset by 8 for the loads */ + srdi. r6,r4,3 /* divide by 8 for doubleword count */ + addic r5,r5,0 /* clear carry */ + beq 3f /* if we're doing < 8 bytes */ + andi. r0,r3,2 /* aligned on a word boundary already? */ + beq+ 1f + lhz r6,8(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r5,r5,r6 + srdi. r6,r4,3 /* recompute number of doublewords */ + beq 3f /* any left? */ +1: mtctr r6 +2: ldu r6,8(r3) /* main sum loop */ + adde r5,r5,r6 + bdnz 2b + andi. r4,r4,7 /* compute bytes left to sum after doublewords */ +3: cmpwi 0,r4,4 /* is at least a full word left? */ + blt 4f + lwz r6,8(r3) /* sum this word */ + addi r3,r3,4 + subi r4,r4,4 + adde r5,r5,r6 +4: cmpwi 0,r4,2 /* is at least a halfword left? */ + blt+ 5f + lhz r6,8(r3) /* sum this halfword */ + addi r3,r3,2 + subi r4,r4,2 + adde r5,r5,r6 +5: cmpwi 0,r4,1 /* is at least a byte left? */ + bne+ 6f + lbz r6,8(r3) /* sum this byte */ + slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ + adde r5,r5,r6 +6: addze r5,r5 /* add in final carry */ + rldicl r4,r5,32,0 /* fold two 32-bit halves together */ + add r3,r4,r5 + srdi r3,r3,32 + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * This code needs to be reworked to take advantage of 64 bit sum+copy. + * However, due to tokenring halfword alignment problems this will be very + * tricky. For now we'll leave it until we instrument it somehow. + * + * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: mtctr r6 +82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ +92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ + adde r0,r0,r6 + bdnz 82b + andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ + rldicl r4,r3,32,0 /* fold 64 bit value */ + add r3,r4,r3 + srdi r3,r3,32 + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + + .globl src_error_1 +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 + srwi. r6,r5,2 + beq 3f + mtctr r6 + .globl src_error_2 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error + .globl src_error_3 +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b + .globl src_error +src_error: + cmpdi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + + .globl dst_error +dst_error: + cmpdi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .align 3 + .llong 81b,src_error_1 + .llong 91b,dst_error + .llong 82b,src_error_2 + .llong 92b,dst_error + .llong 83b,src_error_3 + .llong 93b,dst_error + .llong 84b,src_error_3 + .llong 94b,dst_error + .llong 95b,dst_error + .llong 96b,dst_error + .llong 97b,dst_error diff --git a/arch/powerpc/lib/copy32.S b/arch/powerpc/lib/copy32.S new file mode 100644 index 0000000..420a912 --- /dev/null +++ b/arch/powerpc/lib/copy32.S @@ -0,0 +1,543 @@ +/* + * Memory copy functions for 32-bit PowerPC. + * + * Copyright (C) 1996-2005 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include + +#define COPY_16_BYTES \ + lwz r7,4(r4); \ + lwz r8,8(r4); \ + lwz r9,12(r4); \ + lwzu r10,16(r4); \ + stw r7,4(r6); \ + stw r8,8(r6); \ + stw r9,12(r6); \ + stwu r10,16(r6) + +#define COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ +8 ## n ## 5: \ + stw r8,8(r6); \ +8 ## n ## 6: \ + stw r9,12(r6); \ +8 ## n ## 7: \ + stwu r10,16(r6) + +#define COPY_16_BYTES_EXCODE(n) \ +9 ## n ## 0: \ + addi r5,r5,-(16 * n); \ + b 104f; \ +9 ## n ## 1: \ + addi r5,r5,-(16 * n); \ + b 105f; \ +.section __ex_table,"a"; \ + .align 2; \ + .long 8 ## n ## 0b,9 ## n ## 0b; \ + .long 8 ## n ## 1b,9 ## n ## 0b; \ + .long 8 ## n ## 2b,9 ## n ## 0b; \ + .long 8 ## n ## 3b,9 ## n ## 0b; \ + .long 8 ## n ## 4b,9 ## n ## 1b; \ + .long 8 ## n ## 5b,9 ## n ## 1b; \ + .long 8 ## n ## 6b,9 ## n ## 1b; \ + .long 8 ## n ## 7b,9 ## n ## 1b; \ + .text + + .text + .stabs "arch/powerpc/lib/",N_SO,0,0,0f + .stabs "copy32.S",N_SO,0,0,0f +0: + +CACHELINE_BYTES = L1_CACHE_LINE_SIZE +LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE +CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1) + +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. -- paulus + */ +_GLOBAL(cacheable_memzero) + mr r5,r4 + li r4,0 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + clrlwi r7,r6,32-LG_CACHELINE_BYTES + add r8,r7,r5 + srwi r9,r8,LG_CACHELINE_BYTES + addic. r9,r9,-1 /* total number of complete cachelines */ + ble 2f + xori r0,r7,CACHELINE_MASK & ~3 + srwi. r0,r0,2 + beq 3f + mtctr r0 +4: stwu r4,4(r6) + bdnz 4b +3: mtctr r9 + li r7,4 +#if !defined(CONFIG_8xx) +10: dcbz r7,r6 +#else +10: stw r4, 4(r6) + stw r4, 8(r6) + stw r4, 12(r6) + stw r4, 16(r6) +#if CACHE_LINE_SIZE >= 32 + stw r4, 20(r6) + stw r4, 24(r6) + stw r4, 28(r6) + stw r4, 32(r6) +#endif /* CACHE_LINE_SIZE */ +#endif + addi r6,r6,CACHELINE_BYTES + bdnz 10b + clrlwi r5,r8,32-LG_CACHELINE_BYTES + addi r5,r5,4 +2: srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +_GLOBAL(memset) + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBAL(cacheable_memcpy) + add r7,r3,r5 /* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt memcpy /* if regions overlap */ + + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subf r5,r0,r5 + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ + stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: +#if !defined(CONFIG_8xx) + dcbz r11,r6 +#endif + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) + stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: blr + +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + +_GLOBAL(memcpy) + srwi. r7,r5,3 + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(backwards_memcpy) + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(__copy_tofrom_user) + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ +71: stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: subf r5,r0,r5 + srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ +73: stwu r9,4(r6) + bdnz 72b + + .section __ex_table,"a" + .align 2 + .long 70b,100f + .long 71b,101f + .long 72b,102f + .long 73b,103f + .text + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + beq 63f + +#ifdef CONFIG_8xx + /* Don't use prefetch on 8xx */ + mtctr r0 + li r0,0 +53: COPY_16_BYTES_WITHEX(0) + bdnz 53b + +#else /* not CONFIG_8xx */ + /* Here we decide how far ahead to prefetch the source */ + li r3,4 + cmpwi r0,1 + li r7,0 + ble 114f + li r7,1 +#if MAX_COPY_PREFETCH > 1 + /* Heuristically, for large transfers we prefetch + MAX_COPY_PREFETCH cachelines ahead. For small transfers + we prefetch 1 cacheline ahead. */ + cmpwi r0,MAX_COPY_PREFETCH + ble 112f + li r7,MAX_COPY_PREFETCH +112: mtctr r7 +111: dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES + bdnz 111b +#else + dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES +#endif /* MAX_COPY_PREFETCH > 1 */ + +114: subf r8,r7,r0 + mr r0,r7 + mtctr r8 + +53: dcbt r3,r4 +54: dcbz r11,r6 + .section __ex_table,"a" + .align 2 + .long 54b,105f + .text +/* the main body of the cacheline loop */ + COPY_16_BYTES_WITHEX(0) +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_WITHEX(1) +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_WITHEX(2) + COPY_16_BYTES_WITHEX(3) +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_WITHEX(4) + COPY_16_BYTES_WITHEX(5) + COPY_16_BYTES_WITHEX(6) + COPY_16_BYTES_WITHEX(7) +#endif +#endif +#endif + bdnz 53b + cmpwi r0,0 + li r3,4 + li r7,0 + bne 114b +#endif /* CONFIG_8xx */ + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) +31: stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) +41: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: li r3,0 + blr + +/* read fault, initial single-byte copy */ +100: li r9,0 + b 90f +/* write fault, initial single-byte copy */ +101: li r9,1 +90: subf r5,r8,r5 + li r3,0 + b 99f +/* read fault, initial word copy */ +102: li r9,0 + b 91f +/* write fault, initial word copy */ +103: li r9,1 +91: li r3,2 + b 99f + +/* + * this stuff handles faults in the cacheline loop and branches to either + * 104f (if in read part) or 105f (if in write part), after updating r5 + */ + COPY_16_BYTES_EXCODE(0) +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_EXCODE(1) +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_EXCODE(2) + COPY_16_BYTES_EXCODE(3) +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_EXCODE(4) + COPY_16_BYTES_EXCODE(5) + COPY_16_BYTES_EXCODE(6) + COPY_16_BYTES_EXCODE(7) +#endif +#endif +#endif + +/* read fault in cacheline loop */ +104: li r9,0 + b 92f +/* fault on dcbz (effectively a write fault) */ +/* or write fault in cacheline loop */ +105: li r9,1 +92: li r3,LG_CACHELINE_BYTES + mfctr r8 + add r0,r0,r8 + b 106f +/* read fault in final word loop */ +108: li r9,0 + b 93f +/* write fault in final word loop */ +109: li r9,1 +93: andi. r5,r5,3 + li r3,2 + b 99f +/* read fault in final byte loop */ +110: li r9,0 + b 94f +/* write fault in final byte loop */ +111: li r9,1 +94: li r5,0 + li r3,0 +/* + * At this stage the number of bytes not copied is + * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. + */ +99: mfctr r0 +106: slw r3,r0,r3 + add. r3,r3,r5 + beq 120f /* shouldn't happen */ + cmpwi 0,r9,0 + bne 120f +/* for a read fault, first try to continue the copy one byte at a time */ + mtctr r3 +130: lbz r0,4(r4) +131: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 130b +/* then clear out the destination: r3 bytes starting at 4(r6) */ +132: mfctr r3 + srwi. r0,r3,2 + li r9,0 + mtctr r0 + beq 113f +112: stwu r9,4(r6) + bdnz 112b +113: andi. r0,r3,3 + mtctr r0 + beq 120f +114: stb r9,4(r6) + addi r6,r6,1 + bdnz 114b +120: blr + + .section __ex_table,"a" + .align 2 + .long 30b,108b + .long 31b,109b + .long 40b,110b + .long 41b,111b + .long 130b,132b + .long 131b,120b + .long 112b,120b + .long 114b,120b + .text diff --git a/arch/powerpc/lib/copypage.S b/arch/powerpc/lib/copypage.S new file mode 100644 index 0000000..733d616 --- /dev/null +++ b/arch/powerpc/lib/copypage.S @@ -0,0 +1,121 @@ +/* + * arch/ppc64/lib/copypage.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +_GLOBAL(copy_page) + std r31,-8(1) + std r30,-16(1) + std r29,-24(1) + std r28,-32(1) + std r27,-40(1) + std r26,-48(1) + std r25,-56(1) + std r24,-64(1) + std r23,-72(1) + std r22,-80(1) + std r21,-88(1) + std r20,-96(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r12,5 +0: addi r5,r5,-24 + mtctr r12 + ld r22,640(4) + ld r21,512(4) + ld r20,384(4) + ld r11,256(4) + ld r9,128(4) + ld r7,0(4) + ld r25,648(4) + ld r24,520(4) + ld r23,392(4) + ld r10,264(4) + ld r8,136(4) + ldu r6,8(4) + cmpwi r5,24 +1: std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + ld r28,648(4) + ld r27,520(4) + ld r26,392(4) + ld r31,264(4) + ld r30,136(4) + ld r29,8(4) + std r25,656(3) + std r24,528(3) + std r23,400(3) + std r10,272(3) + std r8,144(3) + std r6,16(3) + ld r22,656(4) + ld r21,528(4) + ld r20,400(4) + ld r11,272(4) + ld r9,144(4) + ld r7,16(4) + std r28,664(3) + std r27,536(3) + std r26,408(3) + std r31,280(3) + std r30,152(3) + stdu r29,24(3) + ld r25,664(4) + ld r24,536(4) + ld r23,408(4) + ld r10,280(4) + ld r8,152(4) + ldu r6,24(4) + bdnz 1b + std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + addi r4,r4,640 + addi r3,r3,648 + bge 0b + mtctr r5 + ld r7,0(4) + ld r8,8(4) + ldu r9,16(4) +3: ld r10,8(4) + std r7,8(3) + ld r7,16(4) + std r8,16(3) + ld r8,24(4) + std r9,24(3) + ldu r9,32(4) + stdu r10,32(3) + bdnz 3b +4: ld r10,8(4) + std r7,8(3) + std r8,16(3) + std r9,24(3) + std r10,32(3) +9: ld r20,-96(1) + ld r21,-88(1) + ld r22,-80(1) + ld r23,-72(1) + ld r24,-64(1) + ld r25,-56(1) + ld r26,-48(1) + ld r27,-40(1) + ld r28,-32(1) + ld r29,-24(1) + ld r30,-16(1) + ld r31,-8(1) + blr diff --git a/arch/powerpc/lib/copyuser.S b/arch/powerpc/lib/copyuser.S new file mode 100644 index 0000000..a0b3fbb --- /dev/null +++ b/arch/powerpc/lib/copyuser.S @@ -0,0 +1,576 @@ +/* + * arch/ppc64/lib/copyuser.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + + .align 7 +_GLOBAL(__copy_tofrom_user) + /* first check for a whole page copy on a page boundary */ + cmpldi cr1,r5,16 + cmpdi cr6,r5,4096 + or r0,r3,r4 + neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + andi. r0,r0,4095 + std r3,-24(r1) + crand cr0*4+2,cr0*4+2,cr6*4+2 + std r4,-16(r1) + std r5,-8(r1) + dcbt 0,r4 + beq .Lcopy_page + andi. r6,r6,7 + mtcrf 0x01,r5 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 +20: ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,22f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,72f +21: ld r9,8(r4) +70: std r8,8(r3) +22: ldu r8,16(r4) +71: stdu r9,16(r3) + bdnz 21b +72: std r8,8(r3) + beq+ 3f + addi r3,r3,16 +23: ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 +73: stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 +74: sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 +75: stb r9,0(r3) +3: li r3,0 + blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpldi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + bt cr7*4+0,28f + +24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ +25: ld r0,8(r4) + sld r6,r9,r10 +26: ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,79f +27: ld r0,8(r4) + b 2f + +28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ +29: ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f +30: ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 +31: ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,78f + +1: or r7,r7,r6 +32: ld r0,8(r4) +76: std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 +33: ldu r9,16(r4) + or r12,r8,r12 +77: stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +78: std r12,8(r3) + or r7,r7,r6 +79: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 +80: std r12,24(r3) + bne 6f + li r3,0 + blr +6: cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail +34: ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f +35: lbz r0,0(r4) +81: stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f +36: lhzx r0,r7,r4 +82: sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f +37: lwzx r0,r7,r4 +83: stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f +38: lwz r0,0(r4) +39: lwz r9,4(r4) + addi r4,r4,8 +84: stw r0,0(r3) +85: stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f +40: lwz r0,0(r4) + addi r4,r4,4 +86: stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f +41: lhz r0,0(r4) + addi r4,r4,2 +87: sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f +42: lbz r0,0(r4) +88: stb r0,0(r3) +4: li r3,0 + blr + +/* + * exception handlers follow + * we have to return the number of bytes not copied + * for an exception on a load, we set the rest of the destination to 0 + */ + +136: +137: + add r3,r3,r7 + b 1f +130: +131: + addi r3,r3,8 +120: +122: +124: +125: +126: +127: +128: +129: +133: + addi r3,r3,8 +121: +132: + addi r3,r3,8 +123: +134: +135: +138: +139: +140: +141: +142: + +/* + * here we have had a fault on a load and r3 points to the first + * unmodified byte of the destination + */ +1: ld r6,-24(r1) + ld r4,-16(r1) + ld r5,-8(r1) + subf r6,r6,r3 + add r4,r4,r6 + subf r5,r6,r5 /* #bytes left to go */ + +/* + * first see if we can copy any more bytes before hitting another exception + */ + mtctr r5 +43: lbz r0,0(r4) + addi r4,r4,1 +89: stb r0,0(r3) + addi r3,r3,1 + bdnz 43b + li r3,0 /* huh? all copied successfully this time? */ + blr + +/* + * here we have trapped again, need to clear ctr bytes starting at r3 + */ +143: mfctr r5 + li r0,0 + mr r4,r3 + mr r3,r5 /* return the number of bytes not copied */ +1: andi. r9,r4,7 + beq 3f +90: stb r0,0(r4) + addic. r5,r5,-1 + addi r4,r4,1 + bne 1b + blr +3: cmpldi cr1,r5,8 + srdi r9,r5,3 + andi. r5,r5,7 + blt cr1,93f + mtctr r9 +91: std r0,0(r4) + addi r4,r4,8 + bdnz 91b +93: beqlr + mtctr r5 +92: stb r0,0(r4) + addi r4,r4,1 + bdnz 92b + blr + +/* + * exception handlers for stores: we just need to work + * out how many bytes weren't copied + */ +182: +183: + add r3,r3,r7 + b 1f +180: + addi r3,r3,8 +171: +177: + addi r3,r3,8 +170: +172: +176: +178: + addi r3,r3,4 +185: + addi r3,r3,4 +173: +174: +175: +179: +181: +184: +186: +187: +188: +189: +1: + ld r6,-24(r1) + ld r5,-8(r1) + add r6,r6,r5 + subf r3,r3,r6 /* #bytes not copied */ +190: +191: +192: + blr /* #bytes not copied in r3 */ + + .section __ex_table,"a" + .align 3 + .llong 20b,120b + .llong 21b,121b + .llong 70b,170b + .llong 22b,122b + .llong 71b,171b + .llong 72b,172b + .llong 23b,123b + .llong 73b,173b + .llong 74b,174b + .llong 75b,175b + .llong 24b,124b + .llong 25b,125b + .llong 26b,126b + .llong 27b,127b + .llong 28b,128b + .llong 29b,129b + .llong 30b,130b + .llong 31b,131b + .llong 32b,132b + .llong 76b,176b + .llong 33b,133b + .llong 77b,177b + .llong 78b,178b + .llong 79b,179b + .llong 80b,180b + .llong 34b,134b + .llong 35b,135b + .llong 81b,181b + .llong 36b,136b + .llong 82b,182b + .llong 37b,137b + .llong 83b,183b + .llong 38b,138b + .llong 39b,139b + .llong 84b,184b + .llong 85b,185b + .llong 40b,140b + .llong 86b,186b + .llong 41b,141b + .llong 87b,187b + .llong 42b,142b + .llong 88b,188b + .llong 43b,143b + .llong 89b,189b + .llong 90b,190b + .llong 91b,191b + .llong 92b,192b + + .text + +/* + * Routine to copy a whole page of data, optimized for POWER4. + * On POWER4 it is more than 50% faster than the simple loop + * above (following the .Ldst_aligned label) but it runs slightly + * slower on POWER3. + */ +.Lcopy_page: + std r31,-32(1) + std r30,-40(1) + std r29,-48(1) + std r28,-56(1) + std r27,-64(1) + std r26,-72(1) + std r25,-80(1) + std r24,-88(1) + std r23,-96(1) + std r22,-104(1) + std r21,-112(1) + std r20,-120(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r0,5 +0: addi r5,r5,-24 + mtctr r0 +20: ld r22,640(4) +21: ld r21,512(4) +22: ld r20,384(4) +23: ld r11,256(4) +24: ld r9,128(4) +25: ld r7,0(4) +26: ld r25,648(4) +27: ld r24,520(4) +28: ld r23,392(4) +29: ld r10,264(4) +30: ld r8,136(4) +31: ldu r6,8(4) + cmpwi r5,24 +1: +32: std r22,648(3) +33: std r21,520(3) +34: std r20,392(3) +35: std r11,264(3) +36: std r9,136(3) +37: std r7,8(3) +38: ld r28,648(4) +39: ld r27,520(4) +40: ld r26,392(4) +41: ld r31,264(4) +42: ld r30,136(4) +43: ld r29,8(4) +44: std r25,656(3) +45: std r24,528(3) +46: std r23,400(3) +47: std r10,272(3) +48: std r8,144(3) +49: std r6,16(3) +50: ld r22,656(4) +51: ld r21,528(4) +52: ld r20,400(4) +53: ld r11,272(4) +54: ld r9,144(4) +55: ld r7,16(4) +56: std r28,664(3) +57: std r27,536(3) +58: std r26,408(3) +59: std r31,280(3) +60: std r30,152(3) +61: stdu r29,24(3) +62: ld r25,664(4) +63: ld r24,536(4) +64: ld r23,408(4) +65: ld r10,280(4) +66: ld r8,152(4) +67: ldu r6,24(4) + bdnz 1b +68: std r22,648(3) +69: std r21,520(3) +70: std r20,392(3) +71: std r11,264(3) +72: std r9,136(3) +73: std r7,8(3) +74: addi r4,r4,640 +75: addi r3,r3,648 + bge 0b + mtctr r5 +76: ld r7,0(4) +77: ld r8,8(4) +78: ldu r9,16(4) +3: +79: ld r10,8(4) +80: std r7,8(3) +81: ld r7,16(4) +82: std r8,16(3) +83: ld r8,24(4) +84: std r9,24(3) +85: ldu r9,32(4) +86: stdu r10,32(3) + bdnz 3b +4: +87: ld r10,8(4) +88: std r7,8(3) +89: std r8,16(3) +90: std r9,24(3) +91: std r10,32(3) +9: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + li r3,0 + blr + +/* + * on an exception, reset to the beginning and jump back into the + * standard __copy_tofrom_user + */ +100: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + ld r3,-24(r1) + ld r4,-16(r1) + li r5,4096 + b .Ldst_aligned + + .section __ex_table,"a" + .align 3 + .llong 20b,100b + .llong 21b,100b + .llong 22b,100b + .llong 23b,100b + .llong 24b,100b + .llong 25b,100b + .llong 26b,100b + .llong 27b,100b + .llong 28b,100b + .llong 29b,100b + .llong 30b,100b + .llong 31b,100b + .llong 32b,100b + .llong 33b,100b + .llong 34b,100b + .llong 35b,100b + .llong 36b,100b + .llong 37b,100b + .llong 38b,100b + .llong 39b,100b + .llong 40b,100b + .llong 41b,100b + .llong 42b,100b + .llong 43b,100b + .llong 44b,100b + .llong 45b,100b + .llong 46b,100b + .llong 47b,100b + .llong 48b,100b + .llong 49b,100b + .llong 50b,100b + .llong 51b,100b + .llong 52b,100b + .llong 53b,100b + .llong 54b,100b + .llong 55b,100b + .llong 56b,100b + .llong 57b,100b + .llong 58b,100b + .llong 59b,100b + .llong 60b,100b + .llong 61b,100b + .llong 62b,100b + .llong 63b,100b + .llong 64b,100b + .llong 65b,100b + .llong 66b,100b + .llong 67b,100b + .llong 68b,100b + .llong 69b,100b + .llong 70b,100b + .llong 71b,100b + .llong 72b,100b + .llong 73b,100b + .llong 74b,100b + .llong 75b,100b + .llong 76b,100b + .llong 77b,100b + .llong 78b,100b + .llong 79b,100b + .llong 80b,100b + .llong 81b,100b + .llong 82b,100b + .llong 83b,100b + .llong 84b,100b + .llong 85b,100b + .llong 86b,100b + .llong 87b,100b + .llong 88b,100b + .llong 89b,100b + .llong 90b,100b + .llong 91b,100b diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S new file mode 100644 index 0000000..3527569 --- /dev/null +++ b/arch/powerpc/lib/div64.S @@ -0,0 +1,58 @@ +/* + * Divide a 64-bit unsigned number by a 32-bit unsigned number. + * This routine assumes that the top 32 bits of the dividend are + * non-zero to start with. + * On entry, r3 points to the dividend, which get overwritten with + * the 64-bit quotient, and r4 contains the divisor. + * On exit, r3 contains the remainder. + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +_GLOBAL(__div64_32) + lwz r5,0(r3) # get the dividend into r5/r6 + lwz r6,4(r3) + cmplw r5,r4 + li r7,0 + li r8,0 + blt 1f + divwu r7,r5,r4 # if dividend.hi >= divisor, + mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor + subf. r5,r0,r5 # dividend.hi %= divisor + beq 3f +1: mr r11,r5 # here dividend.hi != 0 + andis. r0,r5,0xc000 + bne 2f + cntlzw r0,r5 # we are shifting the dividend right + li r10,-1 # to make it < 2^32, and shifting + srw r10,r10,r0 # the divisor right the same amount, + add r9,r4,r10 # rounding up (so the estimate cannot + andc r11,r6,r10 # ever be too large, only too small) + andc r9,r9,r10 + or r11,r5,r11 + rotlw r9,r9,r0 + rotlw r11,r11,r0 + divwu r11,r11,r9 # then we divide the shifted quantities +2: mullw r10,r11,r4 # to get an estimate of the quotient, + mulhwu r9,r11,r4 # multiply the estimate by the divisor, + subfc r6,r10,r6 # take the product from the divisor, + add r8,r8,r11 # and add the estimate to the accumulated + subfe. r5,r9,r5 # quotient + bne 1b +3: cmplw r6,r4 + blt 4f + divwu r0,r6,r4 # perform the remaining 32-bit division + mullw r10,r0,r4 # and get the remainder + add r8,r8,r0 + subf r6,r10,r6 +4: stw r7,0(r3) # return the quotient in *r3 + stw r8,4(r3) + mr r3,r6 # return the remainder in r3 + blr diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c new file mode 100644 index 0000000..d2b8348 --- /dev/null +++ b/arch/powerpc/lib/e2a.c @@ -0,0 +1,108 @@ +/* + * arch/ppc64/lib/e2a.c + * + * EBCDIC to ASCII conversion + * + * This function moved here from arch/ppc64/kernel/viopath.c + * + * (C) Copyright 2000-2004 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +unsigned char e2a(unsigned char x) +{ + switch (x) { + case 0xF0: + return '0'; + case 0xF1: + return '1'; + case 0xF2: + return '2'; + case 0xF3: + return '3'; + case 0xF4: + return '4'; + case 0xF5: + return '5'; + case 0xF6: + return '6'; + case 0xF7: + return '7'; + case 0xF8: + return '8'; + case 0xF9: + return '9'; + case 0xC1: + return 'A'; + case 0xC2: + return 'B'; + case 0xC3: + return 'C'; + case 0xC4: + return 'D'; + case 0xC5: + return 'E'; + case 0xC6: + return 'F'; + case 0xC7: + return 'G'; + case 0xC8: + return 'H'; + case 0xC9: + return 'I'; + case 0xD1: + return 'J'; + case 0xD2: + return 'K'; + case 0xD3: + return 'L'; + case 0xD4: + return 'M'; + case 0xD5: + return 'N'; + case 0xD6: + return 'O'; + case 0xD7: + return 'P'; + case 0xD8: + return 'Q'; + case 0xD9: + return 'R'; + case 0xE2: + return 'S'; + case 0xE3: + return 'T'; + case 0xE4: + return 'U'; + case 0xE5: + return 'V'; + case 0xE6: + return 'W'; + case 0xE7: + return 'X'; + case 0xE8: + return 'Y'; + case 0xE9: + return 'Z'; + } + return ' '; +} +EXPORT_SYMBOL(e2a); + + diff --git a/arch/powerpc/lib/memcpy.S b/arch/powerpc/lib/memcpy.S new file mode 100644 index 0000000..9ccacdf --- /dev/null +++ b/arch/powerpc/lib/memcpy.S @@ -0,0 +1,172 @@ +/* + * arch/ppc64/lib/memcpy.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + + .align 7 +_GLOBAL(memcpy) + mtcrf 0x01,r5 + cmpldi cr1,r5,16 + neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry + andi. r6,r6,7 + dcbt 0,r4 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 + ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,2f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,3f +1: ld r9,8(r4) + std r8,8(r3) +2: ldu r8,16(r4) + stdu r9,16(r3) + bdnz 1b +3: std r8,8(r3) + beqlr + addi r3,r3,16 + ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpdi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + + bt cr7*4+0,0f + + ld r9,0(r4) # 3+2n loads, 2+2n stores + ld r0,8(r4) + sld r6,r9,r10 + ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,4f + ld r0,8(r4) + # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 + b 2f + +0: ld r0,0(r4) # 4+2n loads, 3+2n stores + ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f + ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,3f + + # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 +1: or r7,r7,r6 + ld r0,8(r4) + std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +3: std r12,8(r3) + or r7,r7,r6 +4: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 + std r12,24(r3) + beqlr + cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail + ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f + lbz r0,0(r4) + stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r4 + sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f + lwzx r0,r7,r4 + stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f + lwz r0,0(r4) + lwz r9,4(r4) + addi r4,r4,8 + stw r0,0(r3) + stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f + lwz r0,0(r4) + addi r4,r4,4 + stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f + lhz r0,0(r4) + addi r4,r4,2 + sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f + lbz r0,0(r4) + stb r0,0(r3) +4: blr diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c new file mode 100644 index 0000000..42c5de2 --- /dev/null +++ b/arch/powerpc/lib/rheap.c @@ -0,0 +1,693 @@ +/* + * arch/ppc/syslib/rheap.c + * + * A Remote Heap. Remote means that we don't touch the memory that the + * heap points to. Normal heap implementations use the memory they manage + * to place their list. We cannot do that because the memory we manage may + * have special properties, for example it is uncachable or of different + * endianess. + * + * Author: Pantelis Antoniou + * + * 2004 (c) INTRACOM S.A. Greece. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#include +#include +#include +#include + +#include + +/* + * Fixup a list_head, needed when copying lists. If the pointers fall + * between s and e, apply the delta. This assumes that + * sizeof(struct list_head *) == sizeof(unsigned long *). + */ +static inline void fixup(unsigned long s, unsigned long e, int d, + struct list_head *l) +{ + unsigned long *pp; + + pp = (unsigned long *)&l->next; + if (*pp >= s && *pp < e) + *pp += d; + + pp = (unsigned long *)&l->prev; + if (*pp >= s && *pp < e) + *pp += d; +} + +/* Grow the allocated blocks */ +static int grow(rh_info_t * info, int max_blocks) +{ + rh_block_t *block, *blk; + int i, new_blocks; + int delta; + unsigned long blks, blke; + + if (max_blocks <= info->max_blocks) + return -EINVAL; + + new_blocks = max_blocks - info->max_blocks; + + block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + + if (info->max_blocks > 0) { + + /* copy old block area */ + memcpy(block, info->block, + sizeof(rh_block_t) * info->max_blocks); + + delta = (char *)block - (char *)info->block; + + /* and fixup list pointers */ + blks = (unsigned long)info->block; + blke = (unsigned long)(info->block + info->max_blocks); + + for (i = 0, blk = block; i < info->max_blocks; i++, blk++) + fixup(blks, blke, delta, &blk->list); + + fixup(blks, blke, delta, &info->empty_list); + fixup(blks, blke, delta, &info->free_list); + fixup(blks, blke, delta, &info->taken_list); + + /* free the old allocated memory */ + if ((info->flags & RHIF_STATIC_BLOCK) == 0) + kfree(info->block); + } + + info->block = block; + info->empty_slots += new_blocks; + info->max_blocks = max_blocks; + info->flags &= ~RHIF_STATIC_BLOCK; + + /* add all new blocks to the free list */ + for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++) + list_add(&blk->list, &info->empty_list); + + return 0; +} + +/* + * Assure at least the required amount of empty slots. If this function + * causes a grow in the block area then all pointers kept to the block + * area are invalid! + */ +static int assure_empty(rh_info_t * info, int slots) +{ + int max_blocks; + + /* This function is not meant to be used to grow uncontrollably */ + if (slots >= 4) + return -EINVAL; + + /* Enough space */ + if (info->empty_slots >= slots) + return 0; + + /* Next 16 sized block */ + max_blocks = ((info->max_blocks + slots) + 15) & ~15; + + return grow(info, max_blocks); +} + +static rh_block_t *get_slot(rh_info_t * info) +{ + rh_block_t *blk; + + /* If no more free slots, and failure to extend. */ + /* XXX: You should have called assure_empty before */ + if (info->empty_slots == 0) { + printk(KERN_ERR "rh: out of slots; crash is imminent.\n"); + return NULL; + } + + /* Get empty slot to use */ + blk = list_entry(info->empty_list.next, rh_block_t, list); + list_del_init(&blk->list); + info->empty_slots--; + + /* Initialize */ + blk->start = NULL; + blk->size = 0; + blk->owner = NULL; + + return blk; +} + +static inline void release_slot(rh_info_t * info, rh_block_t * blk) +{ + list_add(&blk->list, &info->empty_list); + info->empty_slots++; +} + +static void attach_free_block(rh_info_t * info, rh_block_t * blkn) +{ + rh_block_t *blk; + rh_block_t *before; + rh_block_t *after; + rh_block_t *next; + int size; + unsigned long s, e, bs, be; + struct list_head *l; + + /* We assume that they are aligned properly */ + size = blkn->size; + s = (unsigned long)blkn->start; + e = s + size; + + /* Find the blocks immediately before and after the given one + * (if any) */ + before = NULL; + after = NULL; + next = NULL; + + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + + bs = (unsigned long)blk->start; + be = bs + blk->size; + + if (next == NULL && s >= bs) + next = blk; + + if (be == s) + before = blk; + + if (e == bs) + after = blk; + + /* If both are not null, break now */ + if (before != NULL && after != NULL) + break; + } + + /* Now check if they are really adjacent */ + if (before != NULL && s != (unsigned long)before->start + before->size) + before = NULL; + + if (after != NULL && e != (unsigned long)after->start) + after = NULL; + + /* No coalescing; list insert and return */ + if (before == NULL && after == NULL) { + + if (next != NULL) + list_add(&blkn->list, &next->list); + else + list_add(&blkn->list, &info->free_list); + + return; + } + + /* We don't need it anymore */ + release_slot(info, blkn); + + /* Grow the before block */ + if (before != NULL && after == NULL) { + before->size += size; + return; + } + + /* Grow the after block backwards */ + if (before == NULL && after != NULL) { + after->start = (int8_t *)after->start - size; + after->size += size; + return; + } + + /* Grow the before block, and release the after block */ + before->size += size + after->size; + list_del(&after->list); + release_slot(info, after); +} + +static void attach_taken_block(rh_info_t * info, rh_block_t * blkn) +{ + rh_block_t *blk; + struct list_head *l; + + /* Find the block immediately before the given one (if any) */ + list_for_each(l, &info->taken_list) { + blk = list_entry(l, rh_block_t, list); + if (blk->start > blkn->start) { + list_add_tail(&blkn->list, &blk->list); + return; + } + } + + list_add_tail(&blkn->list, &info->taken_list); +} + +/* + * Create a remote heap dynamically. Note that no memory for the blocks + * are allocated. It will upon the first allocation + */ +rh_info_t *rh_create(unsigned int alignment) +{ + rh_info_t *info; + + /* Alignment must be a power of two */ + if ((alignment & (alignment - 1)) != 0) + return ERR_PTR(-EINVAL); + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return ERR_PTR(-ENOMEM); + + info->alignment = alignment; + + /* Initially everything as empty */ + info->block = NULL; + info->max_blocks = 0; + info->empty_slots = 0; + info->flags = 0; + + INIT_LIST_HEAD(&info->empty_list); + INIT_LIST_HEAD(&info->free_list); + INIT_LIST_HEAD(&info->taken_list); + + return info; +} + +/* + * Destroy a dynamically created remote heap. Deallocate only if the areas + * are not static + */ +void rh_destroy(rh_info_t * info) +{ + if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL) + kfree(info->block); + + if ((info->flags & RHIF_STATIC_INFO) == 0) + kfree(info); +} + +/* + * Initialize in place a remote heap info block. This is needed to support + * operation very early in the startup of the kernel, when it is not yet safe + * to call kmalloc. + */ +void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks, + rh_block_t * block) +{ + int i; + rh_block_t *blk; + + /* Alignment must be a power of two */ + if ((alignment & (alignment - 1)) != 0) + return; + + info->alignment = alignment; + + /* Initially everything as empty */ + info->block = block; + info->max_blocks = max_blocks; + info->empty_slots = max_blocks; + info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK; + + INIT_LIST_HEAD(&info->empty_list); + INIT_LIST_HEAD(&info->free_list); + INIT_LIST_HEAD(&info->taken_list); + + /* Add all new blocks to the free list */ + for (i = 0, blk = block; i < max_blocks; i++, blk++) + list_add(&blk->list, &info->empty_list); +} + +/* Attach a free memory region, coalesces regions if adjuscent */ +int rh_attach_region(rh_info_t * info, void *start, int size) +{ + rh_block_t *blk; + unsigned long s, e, m; + int r; + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + /* Take final values */ + start = (void *)s; + size = (int)(e - s); + + /* Grow the blocks, if needed */ + r = assure_empty(info, 1); + if (r < 0) + return r; + + blk = get_slot(info); + blk->start = start; + blk->size = size; + blk->owner = NULL; + + attach_free_block(info, blk); + + return 0; +} + +/* Detatch given address range, splits free block if needed. */ +void *rh_detach_region(rh_info_t * info, void *start, int size) +{ + struct list_head *l; + rh_block_t *blk, *newblk; + unsigned long s, e, m, bs, be; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + if (assure_empty(info, 1) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + /* The range must lie entirely inside one free block */ + bs = (unsigned long)blk->start; + be = (unsigned long)blk->start + blk->size; + if (s >= bs && e <= be) + break; + blk = NULL; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Perfect fit */ + if (bs == s && be == e) { + /* Delete from free list, release slot */ + list_del(&blk->list); + release_slot(info, blk); + return (void *)s; + } + + /* blk still in free list, with updated start and/or size */ + if (bs == s || be == e) { + if (bs == s) + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + } else { + /* The front free fragment */ + blk->size = s - bs; + + /* the back free fragment */ + newblk = get_slot(info); + newblk->start = (void *)e; + newblk->size = be - e; + + list_add(&newblk->list, &blk->list); + } + + return (void *)s; +} + +void *rh_alloc(rh_info_t * info, int size, const char *owner) +{ + struct list_head *l; + rh_block_t *blk; + rh_block_t *newblk; + void *start; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* Align to configured alignment */ + size = (size + (info->alignment - 1)) & ~(info->alignment - 1); + + if (assure_empty(info, 1) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + if (size <= blk->size) + break; + blk = NULL; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Just fits */ + if (blk->size == size) { + /* Move from free list to taken list */ + list_del(&blk->list); + blk->owner = owner; + start = blk->start; + + attach_taken_block(info, blk); + + return start; + } + + newblk = get_slot(info); + newblk->start = blk->start; + newblk->size = size; + newblk->owner = owner; + + /* blk still in free list, with updated start, size */ + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + start = newblk->start; + + attach_taken_block(info, newblk); + + return start; +} + +/* allocate at precisely the given address */ +void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner) +{ + struct list_head *l; + rh_block_t *blk, *newblk1, *newblk2; + unsigned long s, e, m, bs, be; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + if (assure_empty(info, 2) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + /* The range must lie entirely inside one free block */ + bs = (unsigned long)blk->start; + be = (unsigned long)blk->start + blk->size; + if (s >= bs && e <= be) + break; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Perfect fit */ + if (bs == s && be == e) { + /* Move from free list to taken list */ + list_del(&blk->list); + blk->owner = owner; + + start = blk->start; + attach_taken_block(info, blk); + + return start; + + } + + /* blk still in free list, with updated start and/or size */ + if (bs == s || be == e) { + if (bs == s) + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + } else { + /* The front free fragment */ + blk->size = s - bs; + + /* The back free fragment */ + newblk2 = get_slot(info); + newblk2->start = (void *)e; + newblk2->size = be - e; + + list_add(&newblk2->list, &blk->list); + } + + newblk1 = get_slot(info); + newblk1->start = (void *)s; + newblk1->size = e - s; + newblk1->owner = owner; + + start = newblk1->start; + attach_taken_block(info, newblk1); + + return start; +} + +int rh_free(rh_info_t * info, void *start) +{ + rh_block_t *blk, *blk2; + struct list_head *l; + int size; + + /* Linear search for block */ + blk = NULL; + list_for_each(l, &info->taken_list) { + blk2 = list_entry(l, rh_block_t, list); + if (start < blk2->start) + break; + blk = blk2; + } + + if (blk == NULL || start > (blk->start + blk->size)) + return -EINVAL; + + /* Remove from taken list */ + list_del(&blk->list); + + /* Get size of freed block */ + size = blk->size; + attach_free_block(info, blk); + + return size; +} + +int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats) +{ + rh_block_t *blk; + struct list_head *l; + struct list_head *h; + int nr; + + switch (what) { + + case RHGS_FREE: + h = &info->free_list; + break; + + case RHGS_TAKEN: + h = &info->taken_list; + break; + + default: + return -EINVAL; + } + + /* Linear search for block */ + nr = 0; + list_for_each(l, h) { + blk = list_entry(l, rh_block_t, list); + if (stats != NULL && nr < max_stats) { + stats->start = blk->start; + stats->size = blk->size; + stats->owner = blk->owner; + stats++; + } + nr++; + } + + return nr; +} + +int rh_set_owner(rh_info_t * info, void *start, const char *owner) +{ + rh_block_t *blk, *blk2; + struct list_head *l; + int size; + + /* Linear search for block */ + blk = NULL; + list_for_each(l, &info->taken_list) { + blk2 = list_entry(l, rh_block_t, list); + if (start < blk2->start) + break; + blk = blk2; + } + + if (blk == NULL || start > (blk->start + blk->size)) + return -EINVAL; + + blk->owner = owner; + size = blk->size; + + return size; +} + +void rh_dump(rh_info_t * info) +{ + static rh_stats_t st[32]; /* XXX maximum 32 blocks */ + int maxnr; + int i, nr; + + maxnr = sizeof(st) / sizeof(st[0]); + + printk(KERN_INFO + "info @0x%p (%d slots empty / %d max)\n", + info, info->empty_slots, info->max_blocks); + + printk(KERN_INFO " Free:\n"); + nr = rh_get_stats(info, RHGS_FREE, maxnr, st); + if (nr > maxnr) + nr = maxnr; + for (i = 0; i < nr; i++) + printk(KERN_INFO + " 0x%p-0x%p (%u)\n", + st[i].start, (int8_t *) st[i].start + st[i].size, + st[i].size); + printk(KERN_INFO "\n"); + + printk(KERN_INFO " Taken:\n"); + nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st); + if (nr > maxnr) + nr = maxnr; + for (i = 0; i < nr; i++) + printk(KERN_INFO + " 0x%p-0x%p (%u) %s\n", + st[i].start, (int8_t *) st[i].start + st[i].size, + st[i].size, st[i].owner != NULL ? st[i].owner : ""); + printk(KERN_INFO "\n"); +} + +void rh_dump_blk(rh_info_t * info, rh_block_t * blk) +{ + printk(KERN_INFO + "blk @0x%p: 0x%p-0x%p (%u)\n", + blk, blk->start, (int8_t *) blk->start + blk->size, blk->size); +} diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c new file mode 100644 index 0000000..e79123d --- /dev/null +++ b/arch/powerpc/lib/sstep.c @@ -0,0 +1,141 @@ +/* + * Single-step support. + * + * Copyright (C) 2004 Paul Mackerras , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +extern char system_call_common[]; + +/* Bits in SRR1 that are copied from MSR */ +#define MSR_MASK 0xffffffff87c0ffff + +/* + * Determine whether a conditional branch instruction would branch. + */ +static int branch_taken(unsigned int instr, struct pt_regs *regs) +{ + unsigned int bo = (instr >> 21) & 0x1f; + unsigned int bi; + + if ((bo & 4) == 0) { + /* decrement counter */ + --regs->ctr; + if (((bo >> 1) & 1) ^ (regs->ctr == 0)) + return 0; + } + if ((bo & 0x10) == 0) { + /* check bit from CR */ + bi = (instr >> 16) & 0x1f; + if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1)) + return 0; + } + return 1; +} + +/* + * Emulate instructions that cause a transfer of control. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int emulate_step(struct pt_regs *regs, unsigned int instr) +{ + unsigned int opcode, rd; + unsigned long int imm; + + opcode = instr >> 26; + switch (opcode) { + case 16: /* bc */ + imm = (signed short)(instr & 0xfffc); + if ((instr & 2) == 0) + imm += regs->nip; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 17: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + case 18: /* b */ + imm = instr & 0x03fffffc; + if (imm & 0x02000000) + imm -= 0x04000000; + if ((instr & 2) == 0) + imm += regs->nip; + if (instr & 1) { + regs->link = regs->nip + 4; + if ((regs->msr & MSR_SF) == 0) + regs->link &= 0xffffffffUL; + } + if ((regs->msr & MSR_SF) == 0) + imm &= 0xffffffffUL; + regs->nip = imm; + return 1; + case 19: + switch (instr & 0x7fe) { + case 0x20: /* bclr */ + case 0x420: /* bcctr */ + imm = (instr & 0x400)? regs->ctr: regs->link; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) { + regs->nip &= 0xffffffffUL; + imm &= 0xffffffffUL; + } + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 0x24: /* rfid, scary */ + return -1; + } + case 31: + rd = (instr >> 21) & 0x1f; + switch (instr & 0x7fe) { + case 0xa6: /* mfmsr */ + regs->gpr[rd] = regs->msr & MSR_MASK; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + case 0x164: /* mtmsrd */ + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV and MSR_ME */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; + imm = (regs->msr & MSR_MASK & ~imm) + | (regs->gpr[rd] & imm); + if ((imm & MSR_RI) == 0) + /* can't step mtmsrd that would clear MSR_RI */ + return -1; + regs->msr = imm; + regs->nip += 4; + if ((imm & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + } + } + return 0; +} diff --git a/arch/powerpc/lib/strcase.c b/arch/powerpc/lib/strcase.c new file mode 100644 index 0000000..36b5210 --- /dev/null +++ b/arch/powerpc/lib/strcase.c @@ -0,0 +1,23 @@ +#include + +int strcasecmp(const char *s1, const char *s2) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while (c1 == c2 && c1 != 0); + return c1 - c2; +} + +int strncasecmp(const char *s1, const char *s2, int n) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while ((--n > 0) && c1 == c2 && c1 != 0); + return c1 - c2; +} diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S new file mode 100644 index 0000000..15d40e9e --- /dev/null +++ b/arch/powerpc/lib/string.S @@ -0,0 +1,203 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + + .text + .stabs "arch/powerpc/lib/",N_SO,0,0,0f + .stabs "string.S",N_SO,0,0,0f +0: + + .section __ex_table,"a" +#ifdef CONFIG_PPC64 + .align 3 +#define EXTBL .llong +#else + .align 2 +#define EXTBL .long +#endif + .text + +_GLOBAL(strcpy) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +/* This clears out any unused part of the destination buffer, + just as the libc version does. -- paulus */ +_GLOBAL(strncpy) + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + bnelr /* if we didn't hit a null char, we're done */ + mfctr r5 + cmpwi 0,r5,0 /* any space left in destination buffer? */ + beqlr /* we know r0 == 0 here */ +2: stbu r0,1(r6) /* clear it out if so */ + bdnz 2b + blr + +_GLOBAL(strcat) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strcmp) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + +_GLOBAL(strlen) + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + +_GLOBAL(memcmp) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr +2: li r3,0 + blr + +_GLOBAL(memchr) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r3,r3,-1 +1: lbzu r0,1(r3) + cmpw 0,r0,r4 + bdnzf 2,1b + beqlr +2: li r3,0 + blr + +_GLOBAL(__clear_user) + addi r6,r3,-4 + li r3,0 + li r5,0 + cmplwi 0,r4,4 + blt 7f + /* clear a single word */ +11: stwu r5,4(r6) + beqlr + /* clear word sized chunks */ + andi. r0,r6,3 + add r4,r0,r4 + subf r6,r0,r6 + srwi r0,r4,2 + andi. r4,r4,3 + mtctr r0 + bdz 7f +1: stwu r5,4(r6) + bdnz 1b + /* clear byte sized chunks */ +7: cmpwi 0,r4,0 + beqlr + mtctr r4 + addi r6,r6,3 +8: stbu r5,1(r6) + bdnz 8b + blr +90: mr r3,r4 + blr +91: mfctr r3 + slwi r3,r3,2 + add r3,r3,r4 + blr +92: mfctr r3 + blr + + .section __ex_table,"a" + EXTBL 11b,90b + EXTBL 1b,91b + EXTBL 8b,92b + .text + +_GLOBAL(__strncpy_from_user) + addi r6,r3,-1 + addi r4,r4,-1 + cmpwi 0,r5,0 + beq 2f + mtctr r5 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + beq 3f +2: addi r6,r6,1 +3: subf r3,r3,r6 + blr +99: li r3,-EFAULT + blr + + .section __ex_table,"a" + EXTBL 1b,99b + .text + +/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ +_GLOBAL(__strnlen_user) + addi r7,r3,-1 + subf r6,r7,r5 /* top+1 - str */ + cmplw 0,r4,r6 + bge 0f + mr r6,r4 +0: mtctr r6 /* ctr = min(len, top - str) */ +1: lbzu r0,1(r7) /* get next byte */ + cmpwi 0,r0,0 + bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ + addi r7,r7,1 + subf r3,r3,r7 /* number of bytes we have looked at */ + beqlr /* return if we found a 0 byte */ + cmpw 0,r3,r4 /* did we look at all len bytes? */ + blt 99f /* if not, must have hit top */ + addi r3,r4,1 /* return len + 1 to indicate no null found */ + blr +99: li r3,0 /* bad address, return 0 */ + blr + + .section __ex_table,"a" + EXTBL 1b,99b diff --git a/arch/powerpc/lib/usercopy.c b/arch/powerpc/lib/usercopy.c new file mode 100644 index 0000000..5eea6f3 --- /dev/null +++ b/arch/powerpc/lib/usercopy.c @@ -0,0 +1,41 @@ +/* + * Functions which are too large to be inlined. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include + +unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_to_user(to, from, n); + return n; +} + +unsigned long copy_in_user(void __user *to, const void __user *from, + unsigned long n) +{ + might_sleep(); + if (likely(access_ok(VERIFY_READ, from, n) && + access_ok(VERIFY_WRITE, to, n))) + n =__copy_tofrom_user(to, from, n); + return n; +} + +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(copy_in_user); + -- cgit v1.1