1 files changed, 133 insertions, 117 deletions
diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c
index 5d90f1e..8a4dc20 100644
--- a/crypto/bn/bn_gf2m.c
+++ b/crypto/bn/bn_gf2m.c
@@ -94,6 +94,8 @@
 #include "cryptlib.h"
 #include "bn_lcl.h"
 
+#ifndef OPENSSL_NO_EC2M
+
 /* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */
 #define MAX_ITERATIONS 50
 
@@ -121,74 +123,13 @@ static const BN_ULONG SQR_tb[16] =
     SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >>  8 & 0xF] << 16 | \
     SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
 #endif
-#ifdef SIXTEEN_BIT
-#define SQR1(w) \
-    SQR_tb[(w) >> 12 & 0xF] <<  8 | SQR_tb[(w) >>  8 & 0xF]
-#define SQR0(w) \
-    SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
-#endif
-#ifdef EIGHT_BIT
-#define SQR1(w) \
-    SQR_tb[(w) >>  4 & 0xF]
-#define SQR0(w) \
-    SQR_tb[(w)       & 15]
-#endif
 
+#if !defined(OPENSSL_BN_ASM_GF2m)
 /* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
  * result is a polynomial r with degree < 2 * BN_BITS - 1
  * The caller MUST ensure that the variables have the right amount
  * of space allocated.
  */
-#ifdef EIGHT_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
-	{
-	register BN_ULONG h, l, s;
-	BN_ULONG tab[4], top1b = a >> 7;
-	register BN_ULONG a1, a2;
-
-	a1 = a & (0x7F); a2 = a1 << 1;
-
-	tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-
-	s = tab[b      & 0x3]; l  = s;
-	s = tab[b >> 2 & 0x3]; l ^= s << 2; h  = s >> 6;
-	s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4;
-	s = tab[b >> 6      ]; l ^= s << 6; h ^= s >> 2;
-	
-	/* compensate for the top bit of a */
-
-	if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } 
-
-	*r1 = h; *r0 = l;
-	} 
-#endif
-#ifdef SIXTEEN_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
-	{
-	register BN_ULONG h, l, s;
-	BN_ULONG tab[4], top1b = a >> 15; 
-	register BN_ULONG a1, a2;
-
-	a1 = a & (0x7FFF); a2 = a1 << 1;
-
-	tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-
-	s = tab[b      & 0x3]; l  = s;
-	s = tab[b >> 2 & 0x3]; l ^= s <<  2; h  = s >> 14;
-	s = tab[b >> 4 & 0x3]; l ^= s <<  4; h ^= s >> 12;
-	s = tab[b >> 6 & 0x3]; l ^= s <<  6; h ^= s >> 10;
-	s = tab[b >> 8 & 0x3]; l ^= s <<  8; h ^= s >>  8;
-	s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >>  6;
-	s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >>  4;
-	s = tab[b >>14      ]; l ^= s << 14; h ^= s >>  2;
-
-	/* compensate for the top bit of a */
-
-	if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } 
-
-	*r1 = h; *r0 = l;
-	} 
-#endif
 #ifdef THIRTY_TWO_BIT
 static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
 	{
@@ -278,7 +219,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
 	r[2] ^= m1 ^ r[1] ^ r[3];  /* h0 ^= m1 ^ l1 ^ h1; */
 	r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0;  /* l1 ^= l0 ^ h0 ^ m0; */
 	}
-
+#else
+void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
+#endif 
 
 /* Add polynomials a and b and store result in r; r could be a or b, a and b 
  * could be equal; r is the bitwise XOR of a and b.
@@ -321,7 +264,7 @@ int	BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
 
 
 /* Performs modular reduction of a and store result in r.  r could be a. */
-int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
+int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
 	{
 	int j, k;
 	int n, dN, d0, d1;
@@ -422,21 +365,17 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
 int	BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
 	{
 	int ret = 0;
-	const int max = BN_num_bits(p);
-	unsigned int *arr=NULL;
+	int arr[6];
 	bn_check_top(a);
 	bn_check_top(p);
-	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-	ret = BN_GF2m_poly2arr(p, arr, max);
-	if (!ret || ret > max)
+	ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0]));
+	if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0])))
 		{
 		BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
-		goto err;
+		return 0;
 		}
 	ret = BN_GF2m_mod_arr(r, a, arr);
 	bn_check_top(r);
-err:
-	if (arr) OPENSSL_free(arr);
 	return ret;
 	}
 
@@ -444,7 +383,7 @@ err:
 /* Compute the product of two polynomials a and b, reduce modulo p, and store
  * the result in r.  r could be a or b; a could be b.
  */
-int	BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int	BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
 	{
 	int zlen, i, j, k, ret = 0;
 	BIGNUM *s;
@@ -500,12 +439,12 @@ err:
 int	BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
 	{
 	int ret = 0;
-	const int max = BN_num_bits(p);
-	unsigned int *arr=NULL;
+	const int max = BN_num_bits(p) + 1;
+	int *arr=NULL;
 	bn_check_top(a);
 	bn_check_top(b);
 	bn_check_top(p);
-	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
 	ret = BN_GF2m_poly2arr(p, arr, max);
 	if (!ret || ret > max)
 		{
@@ -521,7 +460,7 @@ err:
 
 
 /* Square a, reduce the result mod p, and store it in a.  r could be a. */
-int	BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int	BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
 	{
 	int i, ret = 0;
 	BIGNUM *s;
@@ -556,12 +495,12 @@ err:
 int	BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 	{
 	int ret = 0;
-	const int max = BN_num_bits(p);
-	unsigned int *arr=NULL;
+	const int max = BN_num_bits(p) + 1;
+	int *arr=NULL;
 
 	bn_check_top(a);
 	bn_check_top(p);
-	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
 	ret = BN_GF2m_poly2arr(p, arr, max);
 	if (!ret || ret > max)
 		{
@@ -583,7 +522,7 @@ err:
  */
 int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 	{
-	BIGNUM *b, *c, *u, *v, *tmp;
+	BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;
 	int ret = 0;
 
 	bn_check_top(a);
@@ -591,18 +530,18 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 
 	BN_CTX_start(ctx);
 	
-	b = BN_CTX_get(ctx);
-	c = BN_CTX_get(ctx);
-	u = BN_CTX_get(ctx);
-	v = BN_CTX_get(ctx);
-	if (v == NULL) goto err;
+	if ((b = BN_CTX_get(ctx))==NULL) goto err;
+	if ((c = BN_CTX_get(ctx))==NULL) goto err;
+	if ((u = BN_CTX_get(ctx))==NULL) goto err;
+	if ((v = BN_CTX_get(ctx))==NULL) goto err;
 
-	if (!BN_one(b)) goto err;
 	if (!BN_GF2m_mod(u, a, p)) goto err;
-	if (!BN_copy(v, p)) goto err;
-
 	if (BN_is_zero(u)) goto err;
 
+	if (!BN_copy(v, p)) goto err;
+#if 0
+	if (!BN_one(b)) goto err;
+
 	while (1)
 		{
 		while (!BN_is_odd(u))
@@ -627,13 +566,89 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 		if (!BN_GF2m_add(u, u, v)) goto err;
 		if (!BN_GF2m_add(b, b, c)) goto err;
 		}
+#else
+	{
+	int i,	ubits = BN_num_bits(u),
+		vbits = BN_num_bits(v),	/* v is copy of p */
+		top = p->top;
+	BN_ULONG *udp,*bdp,*vdp,*cdp;
+
+	bn_wexpand(u,top);	udp = u->d;
+				for (i=u->top;i<top;i++) udp[i] = 0;
+				u->top = top;
+	bn_wexpand(b,top);	bdp = b->d;
+				bdp[0] = 1;
+				for (i=1;i<top;i++) bdp[i] = 0;
+				b->top = top;
+	bn_wexpand(c,top);	cdp = c->d;
+				for (i=0;i<top;i++) cdp[i] = 0;
+				c->top = top;
+	vdp = v->d;	/* It pays off to "cache" *->d pointers, because
+			 * it allows optimizer to be more aggressive.
+			 * But we don't have to "cache" p->d, because *p
+			 * is declared 'const'... */
+	while (1)
+		{
+		while (ubits && !(udp[0]&1))
+			{
+			BN_ULONG u0,u1,b0,b1,mask;
+
+			u0   = udp[0];
+			b0   = bdp[0];
+			mask = (BN_ULONG)0-(b0&1);
+			b0  ^= p->d[0]&mask;
+			for (i=0;i<top-1;i++)
+				{
+				u1 = udp[i+1];
+				udp[i] = ((u0>>1)|(u1<<(BN_BITS2-1)))&BN_MASK2;
+				u0 = u1;
+				b1 = bdp[i+1]^(p->d[i+1]&mask);
+				bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2;
+				b0 = b1;
+				}
+			udp[i] = u0>>1;
+			bdp[i] = b0>>1;
+			ubits--;
+			}
+
+		if (ubits<=BN_BITS2 && udp[0]==1) break;
 
+		if (ubits<vbits)
+			{
+			i = ubits; ubits = vbits; vbits = i;
+			tmp = u; u = v; v = tmp;
+			tmp = b; b = c; c = tmp;
+			udp = vdp; vdp = v->d;
+			bdp = cdp; cdp = c->d;
+			}
+		for(i=0;i<top;i++)
+			{
+			udp[i] ^= vdp[i];
+			bdp[i] ^= cdp[i];
+			}
+		if (ubits==vbits)
+			{
+			BN_ULONG ul;
+			int utop = (ubits-1)/BN_BITS2;
+
+			while ((ul=udp[utop])==0 && utop) utop--;
+			ubits = utop*BN_BITS2 + BN_num_bits_word(ul);
+			}
+		}
+	bn_correct_top(b);
+	}
+#endif
 
 	if (!BN_copy(r, b)) goto err;
 	bn_check_top(r);
 	ret = 1;
 
 err:
+#ifdef BN_DEBUG /* BN_CTX_end would complain about the expanded form */
+        bn_correct_top(c);
+        bn_correct_top(u);
+        bn_correct_top(v);
+#endif
   	BN_CTX_end(ctx);
 	return ret;
 	}
@@ -644,7 +659,7 @@ err:
  * function is only provided for convenience; for best performance, use the 
  * BN_GF2m_mod_inv function.
  */
-int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx)
 	{
 	BIGNUM *field;
 	int ret = 0;
@@ -770,7 +785,7 @@ err:
  * function is only provided for convenience; for best performance, use the 
  * BN_GF2m_mod_div function.
  */
-int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx)
 	{
 	BIGNUM *field;
 	int ret = 0;
@@ -795,7 +810,7 @@ err:
  * the result in r.  r could be a.
  * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.
  */
-int	BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int	BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
 	{
 	int ret = 0, i, n;
 	BIGNUM *u;
@@ -841,12 +856,12 @@ err:
 int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
 	{
 	int ret = 0;
-	const int max = BN_num_bits(p);
-	unsigned int *arr=NULL;
+	const int max = BN_num_bits(p) + 1;
+	int *arr=NULL;
 	bn_check_top(a);
 	bn_check_top(b);
 	bn_check_top(p);
-	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
 	ret = BN_GF2m_poly2arr(p, arr, max);
 	if (!ret || ret > max)
 		{
@@ -864,7 +879,7 @@ err:
  * the result in r.  r could be a.
  * Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
  */
-int	BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int	BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
 	{
 	int ret = 0;
 	BIGNUM *u;
@@ -900,11 +915,11 @@ err:
 int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 	{
 	int ret = 0;
-	const int max = BN_num_bits(p);
-	unsigned int *arr=NULL;
+	const int max = BN_num_bits(p) + 1;
+	int *arr=NULL;
 	bn_check_top(a);
 	bn_check_top(p);
-	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
 	ret = BN_GF2m_poly2arr(p, arr, max);
 	if (!ret || ret > max)
 		{
@@ -921,10 +936,9 @@ err:
 /* Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns 0.
  * Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
  */
-int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx)
 	{
-	int ret = 0, count = 0;
-	unsigned int j;
+	int ret = 0, count = 0, j;
 	BIGNUM *a, *z, *rho, *w, *w2, *tmp;
 
 	bn_check_top(a_);
@@ -1019,11 +1033,11 @@ err:
 int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 	{
 	int ret = 0;
-	const int max = BN_num_bits(p);
-	unsigned int *arr=NULL;
+	const int max = BN_num_bits(p) + 1;
+	int *arr=NULL;
 	bn_check_top(a);
 	bn_check_top(p);
-	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) *
+	if ((arr = (int *)OPENSSL_malloc(sizeof(int) *
 						max)) == NULL) goto err;
 	ret = BN_GF2m_poly2arr(p, arr, max);
 	if (!ret || ret > max)
@@ -1039,20 +1053,17 @@ err:
 	}
 
 /* Convert the bit-string representation of a polynomial
- * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array
- * of integers corresponding to the bits with non-zero coefficient.
+ * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding 
+ * to the bits with non-zero coefficient.  Array is terminated with -1.
  * Up to max elements of the array will be filled.  Return value is total
- * number of coefficients that would be extracted if array was large enough.
+ * number of array elements that would be filled if array was large enough.
  */
-int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
+int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)
 	{
 	int i, j, k = 0;
 	BN_ULONG mask;
 
-	if (BN_is_zero(a) || !BN_is_bit_set(a, 0))
-		/* a_0 == 0 => return error (the unsigned int array
-		 * must be terminated by 0)
-		 */
+	if (BN_is_zero(a))
 		return 0;
 
 	for (i = a->top - 1; i >= 0; i--)
@@ -1072,26 +1083,31 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
 			}
 		}
 
+	if (k < max) {
+		p[k] = -1;
+		k++;
+	}
+
 	return k;
 	}
 
 /* Convert the coefficient array representation of a polynomial to a 
- * bit-string.  The array must be terminated by 0.
+ * bit-string.  The array must be terminated by -1.
  */
-int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a)
+int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
 	{
 	int i;
 
 	bn_check_top(a);
 	BN_zero(a);
-	for (i = 0; p[i] != 0; i++)
+	for (i = 0; p[i] != -1; i++)
 		{
 		if (BN_set_bit(a, p[i]) == 0)
 			return 0;
 		}
-	BN_set_bit(a, 0);
 	bn_check_top(a);
 
 	return 1;
 	}
 
+#endif