/* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */ /*- * Copyright (c) 2001 Ben Harris. * Copyright (c) 1994 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * blockio.S * * optimised block read/write from/to IO routines. * * Created : 08/10/94 * Modified : 22/01/99 -- R.Earnshaw * Faster, and small tweaks for StrongARM */ #include __FBSDID("$FreeBSD$"); .syntax unified /* * Read bytes from an I/O address into a block of memory * * r0 = address to read from (IO) * r1 = address to write to (memory) * r2 = length */ /* This code will look very familiar if you've read _memcpy(). */ ENTRY(read_multi_1) mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 subs r2, r2, #4 /* r2 = length - 4 */ blt .Lrm1_l4 /* less than 4 bytes */ ands r12, r1, #3 beq .Lrm1_main /* aligned destination */ rsb r12, r12, #4 cmp r12, #2 ldrb r3, [r0] strb r3, [r1], #1 ldrbge r3, [r0] strbge r3, [r1], #1 ldrbgt r3, [r0] strbgt r3, [r1], #1 subs r2, r2, r12 blt .Lrm1_l4 .Lrm1_main: .Lrm1loop: ldrb r3, [r0] ldrb r12, [r0] orr r3, r3, r12, lsl #8 ldrb r12, [r0] orr r3, r3, r12, lsl #16 ldrb r12, [r0] orr r3, r3, r12, lsl #24 str r3, [r1], #4 subs r2, r2, #4 bge .Lrm1loop .Lrm1_l4: adds r2, r2, #4 /* r2 = length again */ ldmdbeq fp, {fp, sp, pc} RETeq cmp r2, #2 ldrb r3, [r0] strb r3, [r1], #1 ldrbge r3, [r0] strbge r3, [r1], #1 ldrbgt r3, [r0] strbgt r3, [r1], #1 ldmdb fp, {fp, sp, pc} END(read_multi_1) /* * Write bytes to an I/O address from a block of memory * * r0 = address to write to (IO) * r1 = address to read from (memory) * r2 = length */ /* This code will look very familiar if you've read _memcpy(). */ ENTRY(write_multi_1) mov ip, sp stmfd sp!, {fp, ip, lr, pc} sub fp, ip, #4 subs r2, r2, #4 /* r2 = length - 4 */ blt .Lwm1_l4 /* less than 4 bytes */ ands r12, r1, #3 beq .Lwm1_main /* aligned source */ rsb r12, r12, #4 cmp r12, #2 ldrb r3, [r1], #1 strb r3, [r0] ldrbge r3, [r1], #1 strbge r3, [r0] ldrbgt r3, [r1], #1 strbgt r3, [r0] subs r2, r2, r12 blt .Lwm1_l4 .Lwm1_main: .Lwm1loop: ldr r3, [r1], #4 strb r3, [r0] mov r3, r3, lsr #8 strb r3, [r0] mov r3, r3, lsr #8 strb r3, [r0] mov r3, r3, lsr #8 strb r3, [r0] subs r2, r2, #4 bge .Lwm1loop .Lwm1_l4: adds r2, r2, #4 /* r2 = length again */ ldmdbeq fp, {fp, sp, pc} cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0] ldrbge r3, [r1], #1 strbge r3, [r0] ldrbgt r3, [r1], #1 strbgt r3, [r0] ldmdb fp, {fp, sp, pc} END(write_multi_1) /* * Reads short ints (16 bits) from an I/O address into a block of memory * * r0 = address to read from (IO) * r1 = address to write to (memory) * r2 = length */ ENTRY(insw) /* Make sure that we have a positive length */ cmp r2, #0x00000000 movle pc, lr /* If the destination address and the size is word aligned, do it fast */ tst r2, #0x00000001 tsteq r1, #0x00000003 beq .Lfastinsw /* Non aligned insw */ .Linswloop: ldr r3, [r0] subs r2, r2, #0x00000001 /* Loop test in load delay slot */ strb r3, [r1], #0x0001 mov r3, r3, lsr #8 strb r3, [r1], #0x0001 bgt .Linswloop RET /* Word aligned insw */ .Lfastinsw: .Lfastinswloop: ldr r3, [r0, #0x0002] /* take advantage of nonaligned * word accesses */ ldr ip, [r0] mov r3, r3, lsr #16 /* Put the two shorts together */ orr r3, r3, ip, lsl #16 str r3, [r1], #0x0004 /* Store */ subs r2, r2, #0x00000002 /* Next */ bgt .Lfastinswloop RET END(insw) /* * Writes short ints (16 bits) from a block of memory to an I/O address * * r0 = address to write to (IO) * r1 = address to read from (memory) * r2 = length */ ENTRY(outsw) /* Make sure that we have a positive length */ cmp r2, #0x00000000 movle pc, lr /* If the destination address and the size is word aligned, do it fast */ tst r2, #0x00000001 tsteq r1, #0x00000003 beq .Lfastoutsw /* Non aligned outsw */ .Loutswloop: ldrb r3, [r1], #0x0001 ldrb ip, [r1], #0x0001 subs r2, r2, #0x00000001 /* Loop test in load delay slot */ orr r3, r3, ip, lsl #8 orr r3, r3, r3, lsl #16 str r3, [r0] bgt .Loutswloop RET /* Word aligned outsw */ .Lfastoutsw: .Lfastoutswloop: ldr r3, [r1], #0x0004 /* r3 = (H)(L) */ subs r2, r2, #0x00000002 /* Loop test in load delay slot */ eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */ eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */ eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */ str r3, [r0] str ip, [r0] /* mov ip, r3, lsl #16 * orr ip, ip, ip, lsr #16 * str ip, [r0] * * mov ip, r3, lsr #16 * orr ip, ip, ip, lsl #16 * str ip, [r0] */ bgt .Lfastoutswloop RET END(outsw) /* * reads short ints (16 bits) from an I/O address into a block of memory * with a length garenteed to be a multiple of 16 bytes * with a word aligned destination address * * r0 = address to read from (IO) * r1 = address to write to (memory) * r2 = length */ ENTRY(insw16) /* Make sure that we have a positive length */ cmp r2, #0x00000000 movle pc, lr /* If the destination address is word aligned and the size suitably aligned, do it fast */ tst r2, #0x00000007 tsteq r1, #0x00000003 bne _C_LABEL(insw) /* Word aligned insw */ stmfd sp!, {r4,r5,lr} .Linsw16loop: ldr r3, [r0, #0x0002] /* take advantage of nonaligned * word accesses */ ldr lr, [r0] mov r3, r3, lsr #16 /* Put the two shorts together */ orr r3, r3, lr, lsl #16 ldr r4, [r0, #0x0002] /* take advantage of nonaligned * word accesses */ ldr lr, [r0] mov r4, r4, lsr #16 /* Put the two shorts together */ orr r4, r4, lr, lsl #16 ldr r5, [r0, #0x0002] /* take advantage of nonaligned * word accesses */ ldr lr, [r0] mov r5, r5, lsr #16 /* Put the two shorts together */ orr r5, r5, lr, lsl #16 ldr ip, [r0, #0x0002] /* take advantage of nonaligned * word accesses */ ldr lr, [r0] mov ip, ip, lsr #16 /* Put the two shorts together */ orr ip, ip, lr, lsl #16 stmia r1!, {r3-r5,ip} subs r2, r2, #0x00000008 /* Next */ bgt .Linsw16loop ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */ END(insw16) /* * Writes short ints (16 bits) from a block of memory to an I/O address * * r0 = address to write to (IO) * r1 = address to read from (memory) * r2 = length */ ENTRY(outsw16) /* Make sure that we have a positive length */ cmp r2, #0x00000000 movle pc, lr /* If the destination address is word aligned and the size suitably aligned, do it fast */ tst r2, #0x00000007 tsteq r1, #0x00000003 bne _C_LABEL(outsw) /* Word aligned outsw */ stmfd sp!, {r4,r5,lr} .Loutsw16loop: ldmia r1!, {r4,r5,ip,lr} eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */ eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ str r3, [r0] str r4, [r0] /* mov r3, r4, lsl #16 * orr r3, r3, r3, lsr #16 * str r3, [r0] * * mov r3, r4, lsr #16 * orr r3, r3, r3, lsl #16 * str r3, [r0] */ eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */ eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ str r3, [r0] str r5, [r0] eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */ eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ str r3, [r0] str ip, [r0] eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */ eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ str r3, [r0] str lr, [r0] subs r2, r2, #0x00000008 bgt .Loutsw16loop ldmfd sp!, {r4,r5,pc} /* and go home */ END(outsw16) /* * reads short ints (16 bits) from an I/O address into a block of memory * The I/O address is assumed to be mapped multiple times in a block of * 8 words. * The destination address should be word aligned. * * r0 = address to read from (IO) * r1 = address to write to (memory) * r2 = length */ ENTRY(inswm8) /* Make sure that we have a positive length */ cmp r2, #0x00000000 movle pc, lr /* If the destination address is word aligned and the size suitably aligned, do it fast */ tst r1, #0x00000003 bne _C_LABEL(insw) /* Word aligned insw */ stmfd sp!, {r4-r9,lr} mov lr, #0xff000000 orr lr, lr, #0x00ff0000 .Linswm8_loop8: cmp r2, #8 bcc .Linswm8_l8 ldmia r0, {r3-r9,ip} bic r3, r3, lr orr r3, r3, r4, lsl #16 bic r5, r5, lr orr r4, r5, r6, lsl #16 bic r7, r7, lr orr r5, r7, r8, lsl #16 bic r9, r9, lr orr r6, r9, ip, lsl #16 stmia r1!, {r3-r6} subs r2, r2, #0x00000008 /* Next */ bne .Linswm8_loop8 beq .Linswm8_l1 .Linswm8_l8: cmp r2, #4 bcc .Linswm8_l4 ldmia r0, {r3-r6} bic r3, r3, lr orr r3, r3, r4, lsl #16 bic r5, r5, lr orr r4, r5, r6, lsl #16 stmia r1!, {r3-r4} subs r2, r2, #0x00000004 beq .Linswm8_l1 .Linswm8_l4: cmp r2, #2 bcc .Linswm8_l2 ldmia r0, {r3-r4} bic r3, r3, lr orr r3, r3, r4, lsl #16 str r3, [r1], #0x0004 subs r2, r2, #0x00000002 beq .Linswm8_l1 .Linswm8_l2: cmp r2, #1 bcc .Linswm8_l1 ldr r3, [r0] subs r2, r2, #0x00000001 /* Test in load delay slot */ /* XXX, why don't we use result? */ strb r3, [r1], #0x0001 mov r3, r3, lsr #8 strb r3, [r1], #0x0001 .Linswm8_l1: ldmfd sp!, {r4-r9,pc} /* And go home */ END(inswm8) /* * write short ints (16 bits) to an I/O address from a block of memory * The I/O address is assumed to be mapped multiple times in a block of * 8 words. * The source address should be word aligned. * * r0 = address to read to (IO) * r1 = address to write from (memory) * r2 = length */ ENTRY(outswm8) /* Make sure that we have a positive length */ cmp r2, #0x00000000 movle pc, lr /* If the destination address is word aligned and the size suitably aligned, do it fast */ tst r1, #0x00000003 bne _C_LABEL(outsw) /* Word aligned outsw */ stmfd sp!, {r4-r8,lr} .Loutswm8_loop8: cmp r2, #8 bcc .Loutswm8_l8 ldmia r1!, {r3,r5,r7,ip} eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */ eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */ eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */ eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */ eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */ eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */ eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */ stmia r0, {r3-r8,ip,lr} subs r2, r2, #0x00000008 /* Next */ bne .Loutswm8_loop8 beq .Loutswm8_l1 .Loutswm8_l8: cmp r2, #4 bcc .Loutswm8_l4 ldmia r1!, {r3-r4} eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */ eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */ eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ stmia r0, {r5-r8} subs r2, r2, #0x00000004 beq .Loutswm8_l1 .Loutswm8_l4: cmp r2, #2 bcc .Loutswm8_l2 ldr r3, [r1], #0x0004 /* r3 = (A)(B) */ subs r2, r2, #0x00000002 /* Done test in Load delay slot */ eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/ eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */ eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */ stmia r0, {r4, r5} beq .Loutswm8_l1 .Loutswm8_l2: cmp r2, #1 bcc .Loutswm8_l1 ldrb r3, [r1], #0x0001 ldrb r4, [r1], #0x0001 subs r2, r2, #0x00000001 /* Done test in load delay slot */ /* XXX This test isn't used? */ orr r3, r3, r4, lsl #8 orr r3, r3, r3, lsl #16 str r3, [r0] .Loutswm8_l1: ldmfd sp!, {r4-r8,pc} /* And go home */ END(outswm8)