[PATCH] CRIS update: new subarchitecture v32

New CRIS sub architecture named v32. From: Dave Jones <davej@redhat.com> Fix swapped kmalloc args Signed-off-by: Mikael Starvik <starvik@axis.com> Signed-off-by: Dave Jones <davej@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Mikael Starvik <mikael.starvik@axis.com> 2005-07-27 11:44:44 -0700
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-07-27 16:26:01 -0700
commit: 51533b615e605d86154ec1b4e585c8ca1b0b15b7 (patch)
tree: 4a6d7d8494d2017632d83624fb71b36031e0e7e5 /arch/cris/arch-v32/lib/checksum.S
parent: 5d01e6ce785884a5db5792cd2e5bb36fa82fe23c (diff)
download: op-kernel-dev-51533b615e605d86154ec1b4e585c8ca1b0b15b7.zip
op-kernel-dev-51533b615e605d86154ec1b4e585c8ca1b0b15b7.tar.gz
1 files changed, 111 insertions, 0 deletions
diff --git a/arch/cris/arch-v32/lib/checksum.S b/arch/cris/arch-v32/lib/checksum.S
new file mode 100644
index 0000000..32e6618
--- /dev/null
+++ b/arch/cris/arch-v32/lib/checksum.S
@@ -0,0 +1,111 @@
+/*
+ * A fast checksum routine using movem
+ * Copyright (c) 1998-2001, 2003 Axis Communications AB
+ *
+ * csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+
+	.globl	csum_partial
+csum_partial:
+
+	;; r10 - src
+	;; r11 - length
+	;; r12 - checksum
+
+	;; check for breakeven length between movem and normal word looping versions
+	;; we also do _NOT_ want to compute a checksum over more than the
+	;; actual length when length < 40
+
+	cmpu.w	80,$r11
+	blo	_word_loop
+	nop
+
+	;; need to save the registers we use below in the movem loop
+	;; this overhead is why we have a check above for breakeven length
+	;; only r0 - r8 have to be saved, the other ones are clobber-able
+	;; according to the ABI
+
+	subq	9*4,$sp
+	subq	10*4,$r11	; update length for the first loop
+	movem	$r8,[$sp]
+
+	;; do a movem checksum
+
+_mloop:	movem	[$r10+],$r9	; read 10 longwords
+
+	;; perform dword checksumming on the 10 longwords
+
+	add.d	$r0,$r12
+	addc	$r1,$r12
+	addc	$r2,$r12
+	addc	$r3,$r12
+	addc	$r4,$r12
+	addc	$r5,$r12
+	addc	$r6,$r12
+	addc	$r7,$r12
+	addc	$r8,$r12
+	addc	$r9,$r12
+
+	;; fold the carry into the checksum, to avoid having to loop the carry
+	;; back into the top
+
+	addc	0,$r12
+	addc	0,$r12		; do it again, since we might have generated a carry
+
+	subq	10*4,$r11
+	bge	_mloop
+	nop
+
+	addq	10*4,$r11	; compensate for last loop underflowing length
+
+	movem	[$sp+],$r8	; restore regs
+
+_word_loop:
+	;; only fold if there is anything to fold.
+
+	cmpq	0,$r12
+	beq	_no_fold
+
+	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
+	;; r9 and r13 can be used as temporaries.
+
+	moveq	-1,$r9		; put 0xffff in r9, faster than move.d 0xffff,r9
+	lsrq	16,$r9
+
+	move.d	$r12,$r13
+	lsrq	16,$r13		; r13 = checksum >> 16
+	and.d	$r9,$r12		; checksum = checksum & 0xffff
+	add.d	$r13,$r12		; checksum += r13
+	move.d	$r12,$r13		; do the same again, maybe we got a carry last add
+	lsrq	16,$r13
+	and.d	$r9,$r12
+	add.d	$r13,$r12
+
+_no_fold:
+	cmpq	2,$r11
+	blt	_no_words
+	nop
+
+	;; checksum the rest of the words
+
+	subq	2,$r11
+
+_wloop:	subq	2,$r11
+	bge	_wloop
+	addu.w	[$r10+],$r12
+
+	addq	2,$r11
+
+_no_words:
+	;; see if we have one odd byte more
+	cmpq	1,$r11
+	beq	_do_byte
+	nop
+	ret
+	move.d	$r12,$r10
+
+_do_byte:
+	;; copy and checksum the last byte
+	addu.b	[$r10],$r12
+	ret
+	move.d	$r12,$r10
author	Mikael Starvik <mikael.starvik@axis.com>	2005-07-27 11:44:44 -0700
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-07-27 16:26:01 -0700
commit	51533b615e605d86154ec1b4e585c8ca1b0b15b7 (patch)
tree	4a6d7d8494d2017632d83624fb71b36031e0e7e5 /arch/cris/arch-v32/lib/checksum.S
parent	5d01e6ce785884a5db5792cd2e5bb36fa82fe23c (diff)
download	op-kernel-dev-51533b615e605d86154ec1b4e585c8ca1b0b15b7.zip op-kernel-dev-51533b615e605d86154ec1b4e585c8ca1b0b15b7.tar.gz