diff options
Diffstat (limited to 'sys/mips/rmi/xlr_csum_nocopy.S')
-rw-r--r-- | sys/mips/rmi/xlr_csum_nocopy.S | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/sys/mips/rmi/xlr_csum_nocopy.S b/sys/mips/rmi/xlr_csum_nocopy.S new file mode 100644 index 0000000..8b51a7f --- /dev/null +++ b/sys/mips/rmi/xlr_csum_nocopy.S @@ -0,0 +1,217 @@ +#include <machine/asm.h> + + +/* + * a0: source address + * a1: length of the area to checksum + * a2: partial checksum + * a3: dst + */ + +#define src a0 +#define dst a3 +#define sum v0 + + .text + .set noreorder + + .macro CSUM_BIGCHUNK_AND_COPY offset + pref 0, (\offset+0x0)(a0) + ld t0, (\offset+0x00)(a0) + ld t1, (\offset+0x08)(a0) + .word 0x70481038 /*daddwc v0, v0, t0 */ + .word 0x70491038 /*daddwc v0, v0, t1 */ + ld t0, (\offset + 0x10)(a0) + ld t1, (\offset + 0x18)(a0) + .word 0x70481038 /* daddwc v0, v0, t0 */ + .word 0x70491038 /*daddwc v0, v0, t1 */ + .endm + +small_csumcpy: /* unknown src alignment and < 8 bytes to go */ + move a1, t2 + + andi t0, a1, 4 + beqz t0, 1f + andi t0, a1, 2 + + ulw t1, (src) /* Still a full word to go */ + daddiu src, 4 + .word 0x70491038 /*daddwc v0, v0, t1 */ + +1: move t1, zero + beqz t0, 1f + andi t0, a1, 1 + + ulhu t1, (src) /* Still a halfword to go */ + daddiu src, 2 + +1: beqz t0, 1f + sll t1, t1, 16 + + lbu t2, (src) + nop + +#ifdef __MIPSEB__ + sll t2, t2, 8 +#endif + or t1, t2 + +1: .word 0x70491038 /*daddwc v0, v0, t1 */ + + .word 0x70461038 /*daddwc v0, v0, a2 */ + .word 0x70401038 /*daddwc v0, v0, $0 */ + + /* Ideally at this point of time the status flag must be cleared */ + + dsll32 v1, sum, 0 + .word 0x70431038 /*daddwc v0, v0, v1 */ + dsrl32 sum, sum, 0 + .word 0x70401038 /*daddwc v0, v0, zero */ + + /* fold the checksum */ + sll v1, sum, 16 + addu sum, v1 + sltu v1, sum, v1 + srl sum, sum, 16 + addu sum, v1 +1: + .set reorder + jr ra + .set noreorder + +/* ------------------------------------------------------------------ */ + + .align 5 +LEAF(xlr_csum_partial_nocopy) + move sum, zero + move t7, zero + + sltiu t8, a1, 0x8 + bnez t8, small_csumcpy /* < 8 bytes to copy */ + move t2, a1 + + beqz a1, out + andi t7, src, 0x1 /* odd buffer? */ + +hword_align: + beqz t7, word_align + andi t8, src, 0x2 + + lbu t0, (src) + dsubu a1, a1, 0x1 + .word 0x70481038 /*daddwc v0, v0, t0 */ + daddu src, src, 0x1 + andi t8, src, 0x2 + +word_align: + beqz t8, dword_align + sltiu t8, a1, 56 + + lhu t0, (src) + dsubu a1, a1, 0x2 + .word 0x70481038 /*daddwc v0, v0, t0 */ + sltiu t8, a1, 56 + daddu src, src, 0x2 + +dword_align: + bnez t8, do_end_words + move t8, a1 + + andi t8, src, 0x4 + beqz t8, qword_align + andi t8, src, 0x8 + + lw t0, 0x00(src) + dsubu a1, a1, 0x4 + .word 0x70481038 /*daddwc v0, v0, t0 */ + daddu src, src, 0x4 + andi t8, src, 0x8 + +qword_align: + beqz t8, oword_align + andi t8, src, 0x10 + + ld t0, 0x00(src) + dsubu a1, a1, 0x8 + .word 0x70481038 /*daddwc v0, v0, t0 */ + daddu src, src, 0x8 + andi t8, src, 0x10 + +oword_align: + beqz t8, begin_movement + dsrl t8, a1, 0x7 + + ld t3, 0x08(src) + ld t0, 0x00(src) + .word 0x704b1038 /*daddwc v0, v0, t3 */ + .word 0x70481038 /*daddwc v0, v0, t0 */ + dsubu a1, a1, 0x10 + daddu src, src, 0x10 + dsrl t8, a1, 0x7 + +begin_movement: + beqz t8, 1f + andi t2, a1, 0x40 + +move_128bytes: + pref 0, 0x20(a0) + pref 0, 0x40(a0) + pref 0, 0x60(a0) + CSUM_BIGCHUNK_AND_COPY(0x00) + CSUM_BIGCHUNK_AND_COPY(0x20) + CSUM_BIGCHUNK_AND_COPY(0x40) + CSUM_BIGCHUNK_AND_COPY(0x60) + dsubu t8, t8, 0x01 + bnez t8, move_128bytes /* flag */ + daddu src, src, 0x80 + +1: + beqz t2, 1f + andi t2, a1, 0x20 + +move_64bytes: + pref 0, 0x20(a0) + pref 0, 0x40(a0) + CSUM_BIGCHUNK_AND_COPY(0x00) + CSUM_BIGCHUNK_AND_COPY(0x20) + daddu src, src, 0x40 + +1: + beqz t2, do_end_words + andi t8, a1, 0x1c + +move_32bytes: + pref 0, 0x20(a0) + CSUM_BIGCHUNK_AND_COPY(0x00) + andi t8, a1, 0x1c + daddu src, src, 0x20 + +do_end_words: + beqz t8, maybe_end_cruft + dsrl t8, t8, 0x2 + +end_words: + lw t0, (src) + dsubu t8, t8, 0x1 + .word 0x70481038 /*daddwc v0, v0, t0 */ + bnez t8, end_words + daddu src, src, 0x4 + +maybe_end_cruft: + andi t2, a1, 0x3 + +small_memcpy: + j small_csumcpy; move a1, t2 + beqz t2, out + move a1, t2 + +end_bytes: + lb t0, (src) + dsubu a1, a1, 0x1 + bnez a2, end_bytes + daddu src, src, 0x1 + +out: + jr ra + move v0, sum + END(xlr_csum_partial_nocopy) |