summaryrefslogtreecommitdiffstats
path: root/crypto/openssl/crypto/bn/asm/x86_64-gcc.c
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/openssl/crypto/bn/asm/x86_64-gcc.c')
-rw-r--r--crypto/openssl/crypto/bn/asm/x86_64-gcc.c110
1 files changed, 56 insertions, 54 deletions
diff --git a/crypto/openssl/crypto/bn/asm/x86_64-gcc.c b/crypto/openssl/crypto/bn/asm/x86_64-gcc.c
index 9c5074b..d548886 100644
--- a/crypto/openssl/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/openssl/crypto/bn/asm/x86_64-gcc.c
@@ -55,7 +55,7 @@
* machine.
*/
-# ifdef _WIN64
+# if defined(_WIN64) || !defined(__LP64__)
# define BN_ULONG unsigned long long
# else
# define BN_ULONG unsigned long
@@ -63,7 +63,6 @@
# undef mul
# undef mul_add
-# undef sqr
/*-
* "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
@@ -99,8 +98,8 @@
: "cc"); \
(r)=carry, carry=high; \
} while (0)
-
-# define sqr(r0,r1,a) \
+# undef sqr
+# define sqr(r0,r1,a) \
asm ("mulq %2" \
: "=a"(r0),"=d"(r1) \
: "a"(a) \
@@ -204,20 +203,22 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
int n)
{
- BN_ULONG ret = 0, i = 0;
+ BN_ULONG ret;
+ size_t i = 0;
if (n <= 0)
return 0;
- asm volatile (" subq %2,%2 \n"
+ asm volatile (" subq %0,%0 \n" /* clear carry */
+ " jmp 1f \n"
".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
- " leaq 1(%2),%2 \n"
+ " lea 1(%2),%2 \n"
" loop 1b \n"
- " sbbq %0,%0 \n":"=&a" (ret), "+c"(n),
- "=&r"(i)
+ " sbbq %0,%0 \n":"=&r" (ret), "+c"(n),
+ "+r"(i)
:"r"(rp), "r"(ap), "r"(bp)
:"cc", "memory");
@@ -228,20 +229,22 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
int n)
{
- BN_ULONG ret = 0, i = 0;
+ BN_ULONG ret;
+ size_t i = 0;
if (n <= 0)
return 0;
- asm volatile (" subq %2,%2 \n"
+ asm volatile (" subq %0,%0 \n" /* clear borrow */
+ " jmp 1f \n"
".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
- " leaq 1(%2),%2 \n"
+ " lea 1(%2),%2 \n"
" loop 1b \n"
- " sbbq %0,%0 \n":"=&a" (ret), "+c"(n),
- "=&r"(i)
+ " sbbq %0,%0 \n":"=&r" (ret), "+c"(n),
+ "+r"(i)
:"r"(rp), "r"(ap), "r"(bp)
:"cc", "memory");
@@ -313,55 +316,58 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
*/
# if 0
/* original macros are kept for reference purposes */
-# define mul_add_c(a,b,c0,c1,c2) { \
- BN_ULONG ta=(a),tb=(b); \
- t1 = ta * tb; \
- t2 = BN_UMULT_HIGH(ta,tb); \
- c0 += t1; t2 += (c0<t1)?1:0; \
- c1 += t2; c2 += (c1<t2)?1:0; \
- }
-
-# define mul_add_c2(a,b,c0,c1,c2) { \
- BN_ULONG ta=(a),tb=(b),t0; \
- t1 = BN_UMULT_HIGH(ta,tb); \
- t0 = ta * tb; \
- c0 += t0; t2 = t1+((c0<t0)?1:0);\
- c1 += t2; c2 += (c1<t2)?1:0; \
- c0 += t0; t1 += (c0<t0)?1:0; \
- c1 += t1; c2 += (c1<t1)?1:0; \
- }
+# define mul_add_c(a,b,c0,c1,c2) do { \
+ BN_ULONG ta = (a), tb = (b); \
+ BN_ULONG lo, hi; \
+ BN_UMULT_LOHI(lo,hi,ta,tb); \
+ c0 += lo; hi += (c0<lo)?1:0; \
+ c1 += hi; c2 += (c1<hi)?1:0; \
+ } while(0)
+
+# define mul_add_c2(a,b,c0,c1,c2) do { \
+ BN_ULONG ta = (a), tb = (b); \
+ BN_ULONG lo, hi, tt; \
+ BN_UMULT_LOHI(lo,hi,ta,tb); \
+ c0 += lo; tt = hi+((c0<lo)?1:0); \
+ c1 += tt; c2 += (c1<tt)?1:0; \
+ c0 += lo; hi += (c0<lo)?1:0; \
+ c1 += hi; c2 += (c1<hi)?1:0; \
+ } while(0)
+
+# define sqr_add_c(a,i,c0,c1,c2) do { \
+ BN_ULONG ta = (a)[i]; \
+ BN_ULONG lo, hi; \
+ BN_UMULT_LOHI(lo,hi,ta,ta); \
+ c0 += lo; hi += (c0<lo)?1:0; \
+ c1 += hi; c2 += (c1<hi)?1:0; \
+ } while(0)
# else
-# define mul_add_c(a,b,c0,c1,c2) do { \
+# define mul_add_c(a,b,c0,c1,c2) do { \
+ BN_ULONG t1,t2; \
asm ("mulq %3" \
: "=a"(t1),"=d"(t2) \
: "a"(a),"m"(b) \
: "cc"); \
- asm ("addq %2,%0; adcq %3,%1" \
- : "+r"(c0),"+d"(t2) \
- : "a"(t1),"g"(0) \
- : "cc"); \
- asm ("addq %2,%0; adcq %3,%1" \
- : "+r"(c1),"+r"(c2) \
- : "d"(t2),"g"(0) \
- : "cc"); \
+ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
+ : "+r"(c0),"+r"(c1),"+r"(c2) \
+ : "r"(t1),"r"(t2),"g"(0) \
+ : "cc"); \
} while (0)
-# define sqr_add_c(a,i,c0,c1,c2) do { \
+# define sqr_add_c(a,i,c0,c1,c2) do { \
+ BN_ULONG t1,t2; \
asm ("mulq %2" \
: "=a"(t1),"=d"(t2) \
: "a"(a[i]) \
: "cc"); \
- asm ("addq %2,%0; adcq %3,%1" \
- : "+r"(c0),"+d"(t2) \
- : "a"(t1),"g"(0) \
- : "cc"); \
- asm ("addq %2,%0; adcq %3,%1" \
- : "+r"(c1),"+r"(c2) \
- : "d"(t2),"g"(0) \
- : "cc"); \
+ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
+ : "+r"(c0),"+r"(c1),"+r"(c2) \
+ : "r"(t1),"r"(t2),"g"(0) \
+ : "cc"); \
} while (0)
-# define mul_add_c2(a,b,c0,c1,c2) do { \
+# define mul_add_c2(a,b,c0,c1,c2) do { \
+ BN_ULONG t1,t2; \
asm ("mulq %3" \
: "=a"(t1),"=d"(t2) \
: "a"(a),"m"(b) \
@@ -382,7 +388,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
- BN_ULONG t1, t2;
BN_ULONG c1, c2, c3;
c1 = 0;
@@ -486,7 +491,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
- BN_ULONG t1, t2;
BN_ULONG c1, c2, c3;
c1 = 0;
@@ -526,7 +530,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
- BN_ULONG t1, t2;
BN_ULONG c1, c2, c3;
c1 = 0;
@@ -602,7 +605,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
- BN_ULONG t1, t2;
BN_ULONG c1, c2, c3;
c1 = 0;
OpenPOWER on IntegriCloud