diff options
Diffstat (limited to 'arch/microblaze/lib/muldi3.S')
-rw-r--r-- | arch/microblaze/lib/muldi3.S | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S new file mode 100644 index 0000000..ceeaa8c --- /dev/null +++ b/arch/microblaze/lib/muldi3.S @@ -0,0 +1,121 @@ +#include <linux/linkage.h> + +/* + * Multiply operation for 64 bit integers, for devices with hard multiply + * Input : Operand1[H] in Reg r5 + * Operand1[L] in Reg r6 + * Operand2[H] in Reg r7 + * Operand2[L] in Reg r8 + * Output: Result[H] in Reg r3 + * Result[L] in Reg r4 + * + * Explaination: + * + * Both the input numbers are divided into 16 bit number as follows + * op1 = A B C D + * op2 = E F G H + * result = D * H + * + (C * H + D * G) << 16 + * + (B * H + C * G + D * F) << 32 + * + (A * H + B * G + C * F + D * E) << 48 + * + * Only 64 bits of the output are considered + */ + + .text + .globl __muldi3 + .type __muldi3, @function + .ent __muldi3 + +__muldi3: + addi r1, r1, -40 + +/* Save the input operands on the caller's stack */ + swi r5, r1, 44 + swi r6, r1, 48 + swi r7, r1, 52 + swi r8, r1, 56 + +/* Store all the callee saved registers */ + sw r20, r1, r0 + swi r21, r1, 4 + swi r22, r1, 8 + swi r23, r1, 12 + swi r24, r1, 16 + swi r25, r1, 20 + swi r26, r1, 24 + swi r27, r1, 28 + +/* Load all the 16 bit values for A thru H */ + lhui r20, r1, 44 /* A */ + lhui r21, r1, 46 /* B */ + lhui r22, r1, 48 /* C */ + lhui r23, r1, 50 /* D */ + lhui r24, r1, 52 /* E */ + lhui r25, r1, 54 /* F */ + lhui r26, r1, 56 /* G */ + lhui r27, r1, 58 /* H */ + +/* D * H ==> LSB of the result on stack ==> Store1 */ + mul r9, r23, r27 + swi r9, r1, 36 /* Pos2 and Pos3 */ + +/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ +/* Store the carry generated in position 2 for Pos 3 */ + lhui r11, r1, 36 /* Pos2 */ + mul r9, r22, r27 /* C * H */ + mul r10, r23, r26 /* D * G */ + add r9, r9, r10 + addc r12, r0, r0 + add r9, r9, r11 + addc r12, r12, r0 /* Store the Carry */ + shi r9, r1, 36 /* Store Pos2 */ + swi r9, r1, 32 + lhui r11, r1, 32 + shi r11, r1, 34 /* Store Pos1 */ + +/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ + mul r9, r21, r27 /* B * H */ + mul r10, r22, r26 /* C * G */ + mul r7, r23, r25 /* D * F */ + add r9, r9, r11 + add r9, r9, r10 + add r9, r9, r7 + swi r9, r1, 32 /* Pos0 and Pos1 */ + +/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ + lhui r11, r1, 32 /* Pos0 */ + mul r9, r20, r27 /* A * H */ + mul r10, r21, r26 /* B * G */ + mul r7, r22, r25 /* C * F */ + mul r8, r23, r24 /* D * E */ + add r9, r9, r11 + add r9, r9, r10 + add r9, r9, r7 + add r9, r9, r8 + sext16 r9, r9 /* Sign extend the MSB */ + shi r9, r1, 32 + +/* Move results to r3 and r4 */ + lhui r3, r1, 32 + add r3, r3, r12 + shi r3, r1, 32 + lwi r3, r1, 32 /* Hi Part */ + lwi r4, r1, 36 /* Lo Part */ + +/* Restore Callee saved registers */ + lw r20, r1, r0 + lwi r21, r1, 4 + lwi r22, r1, 8 + lwi r23, r1, 12 + lwi r24, r1, 16 + lwi r25, r1, 20 + lwi r26, r1, 24 + lwi r27, r1, 28 + +/* Restore Frame and return */ + rtsd r15, 8 + addi r1, r1, 40 + +.size __muldi3, . - __muldi3 +.end __muldi3 |