diff options
author | markm <markm@FreeBSD.org> | 1996-10-20 08:49:26 +0000 |
---|---|---|
committer | markm <markm@FreeBSD.org> | 1996-10-20 08:49:26 +0000 |
commit | 6ec01646dc55b6fa688ed5906e0d52556d174da2 (patch) | |
tree | ecc4c214d76efa8e1b2fb33ac2f0aab18ea03ebf /contrib/libgmp/mpn/generic | |
download | FreeBSD-src-6ec01646dc55b6fa688ed5906e0d52556d174da2.zip FreeBSD-src-6ec01646dc55b6fa688ed5906e0d52556d174da2.tar.gz |
Clean import of libgmp 2.0.2, with only the non-x86 bits removed.
BMakefiles and other bits will follow.
Requested by: Andrey Chernov
Made world by: Chuck Robey
Diffstat (limited to 'contrib/libgmp/mpn/generic')
32 files changed, 4487 insertions, 0 deletions
diff --git a/contrib/libgmp/mpn/generic/add_n.c b/contrib/libgmp/mpn/generic/add_n.c new file mode 100644 index 0000000..9d71df1 --- /dev/null +++ b/contrib/libgmp/mpn/generic/add_n.c @@ -0,0 +1,62 @@ +/* mpn_add_n -- Add two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/contrib/libgmp/mpn/generic/addmul_1.c b/contrib/libgmp/mpn/generic/addmul_1.c new file mode 100644 index 0000000..3a5e214 --- /dev/null +++ b/contrib/libgmp/mpn/generic/addmul_1.c @@ -0,0 +1,65 @@ +/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, add the S1_SIZE least significant limbs of the product to the + limb vector pointed to by RES_PTR. Return the most significant limb of + the product, adjusted for carry-out from the addition. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += (prod_low < x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/contrib/libgmp/mpn/generic/bdivmod.c b/contrib/libgmp/mpn/generic/bdivmod.c new file mode 100644 index 0000000..f095288 --- /dev/null +++ b/contrib/libgmp/mpn/generic/bdivmod.c @@ -0,0 +1,129 @@ +/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d). + + Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and + returns the high d%BITS_PER_MP_LIMB bits of Q as the result. + + Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the + low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows + the limb vectors at qp to overwrite the low limbs at up, provided qp <= up. + + Preconditions: + 1. V is odd. + 2. usize * BITS_PER_MP_LIMB >= d. + 3. If Q and U overlap, qp <= up. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + References: + T. Jebelean, An algorithm for exact division, Journal of Symbolic + Computation, v. 15, 1993, pp. 169-180. + + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize, unsigned long int d) +#else +mpn_bdivmod (qp, up, usize, vp, vsize, d) + mp_ptr qp; + mp_ptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; + unsigned long int d; +#endif +{ + /* Cache for v_inv is used to make mpn_accelgcd faster. */ + static mp_limb_t previous_low_vlimb = 0; + static mp_limb_t v_inv; /* 1/V mod 2^BITS_PER_MP_LIMB. */ + + if (vp[0] != previous_low_vlimb) /* Cache miss; compute v_inv. */ + { + mp_limb_t v = previous_low_vlimb = vp[0]; + mp_limb_t make_zero = 1; + mp_limb_t two_i = 1; + v_inv = 0; + do + { + while ((two_i & make_zero) == 0) + two_i <<= 1, v <<= 1; + v_inv += two_i; + make_zero -= v; + } + while (make_zero); + } + + /* Need faster computation for some common cases in mpn_accelgcd. */ + if (usize == 2 && vsize == 2 && + (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB)) + { + mp_limb_t hi, lo; + mp_limb_t q = up[0] * v_inv; + umul_ppmm (hi, lo, q, vp[0]); + up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q; + if (d == 2*BITS_PER_MP_LIMB) + q = up[1] * v_inv, up[1] = 0, qp[1] = q; + return 0; + } + + /* Main loop. */ + while (d >= BITS_PER_MP_LIMB) + { + mp_limb_t q = up[0] * v_inv; + mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + d -= BITS_PER_MP_LIMB; + up += 1, usize -= 1; + *qp++ = q; + } + + if (d) + { + mp_limb_t b; + mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1); + switch (q) + { + case 0: return 0; + case 1: b = mpn_sub_n (up, up, vp, MIN (usize, vsize)); break; + default: b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); break; + } + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + return q; + } + + return 0; +} diff --git a/contrib/libgmp/mpn/generic/cmp.c b/contrib/libgmp/mpn/generic/cmp.c new file mode 100644 index 0000000..4e9c60d --- /dev/null +++ b/contrib/libgmp/mpn/generic/cmp.c @@ -0,0 +1,56 @@ +/* mpn_cmp -- Compare two low-level natural-number integers. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + There are no restrictions on the relative sizes of + the two arguments. + Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */ + +int +#if __STDC__ +mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size) +#else +mpn_cmp (op1_ptr, op2_ptr, size) + mp_srcptr op1_ptr; + mp_srcptr op2_ptr; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) + { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + diff: + /* This can *not* be simplified to + op2_word - op2_word + since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/contrib/libgmp/mpn/generic/divmod_1.c b/contrib/libgmp/mpn/generic/divmod_1.c new file mode 100644 index 0000000..f93841f --- /dev/null +++ b/contrib/libgmp/mpn/generic/divmod_1.c @@ -0,0 +1,208 @@ +/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + here (not udiv_qrnnd). */ + +mp_limb_t +#if __STDC__ +mpn_divmod_1 (mp_ptr quot_ptr, + mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb) + mp_ptr quot_ptr; + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_size_t i; + mp_limb_t n1, n0, r; + int dummy; + + /* ??? Should this be handled at all? Rely on callers? */ + if (dividend_size == 0) + return 0; + + /* If multiplication is much faster than division, and the + dividend is large, pre-invert the divisor, and use + only multiplications in the inner loop. */ + + /* This test should be read: + Does it ever help to use udiv_qrnnd_preinv? + && Does what we save compensate for the inversion overhead? */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + mp_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + udiv_qrnnd_preinv (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else + { + mp_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + { + quot_ptr[i] = 0; + i--; + } + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (quot_ptr[i], r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else + { + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + it, or because DIVISOR_LIMB is already normalized. */ + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + { + quot_ptr[i] = 0; + i--; + } + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/contrib/libgmp/mpn/generic/divrem.c b/contrib/libgmp/mpn/generic/divrem.c new file mode 100644 index 0000000..1fe865a --- /dev/null +++ b/contrib/libgmp/mpn/generic/divrem.c @@ -0,0 +1,245 @@ +/* mpn_divrem -- Divide natural numbers, producing both remainder and + quotient. + +Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + the NSIZE-DSIZE least significant quotient limbs at QP + and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + non-zero, generate that many fraction bits and append them after the + other quotient limbs. + Return the most significant limb of the quotient, this is always 0 or 1. + + Preconditions: + 0. NSIZE >= DSIZE. + 1. The most significant bit of the divisor must be set. + 2. QP must either not overlap with the input operands at all, or + QP + DSIZE >= NP must hold true. (This means that it's + possible to put the quotient in the high part of NUM, right after the + remainder in NUM. + 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. */ + +mp_limb_t +#if __STDC__ +mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +#else +mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize) + mp_ptr qp; + mp_size_t qextra_limbs; + mp_ptr np; + mp_size_t nsize; + mp_srcptr dp; + mp_size_t dsize; +#endif +{ + mp_limb_t most_significant_q_limb = 0; + + switch (dsize) + { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + return 1 / dsize; + + case 1: + { + mp_size_t i; + mp_limb_t n1; + mp_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if (n1 >= d) + { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for (i = nsize - 2; i >= 0; i--) + udiv_qrnnd (qp[i], n1, n1, np[i], d); + qp -= qextra_limbs; + + for (i = qextra_limbs - 1; i >= 0; i--) + udiv_qrnnd (qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mp_size_t i; + mp_limb_t n1, n0, n2; + mp_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) + { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t r; + + if (i >= qextra_limbs) + np--; + else + np[0] = 0; + + if (n1 == d1) + { + /* Q should be either 111..111 or 111..110. Need special + treatment of this rare case as normal division would + give overflow. */ + q = ~(mp_limb_t) 0; + + r = n0 + d1; + if (r < d1) /* Carry in the addition? */ + { + add_ssaaaa (n1, n0, r - d0, np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0); + n0 = -d0; + } + else + { + udiv_qrnnd (q, r, n1, n0, d1); + umul_ppmm (n1, n0, d0, q); + } + + n2 = np[0]; + q_test: + if (n1 > r || (n1 == r && n0 > n2)) + { + /* The estimated Q was too large. */ + q--; + + sub_ddmmss (n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss (n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mp_size_t i; + mp_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dX) + { + if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0) + { + mpn_sub_n (np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t n1, n2; + mp_limb_t cy_limb; + + if (i >= qextra_limbs) + { + np--; + n2 = np[dsize]; + } + else + { + n2 = np[dsize - 1]; + MPN_COPY_DECR (np + 1, np, dsize); + np[0] = 0; + } + + if (n0 == dX) + /* This might over-estimate q, but it's probably not worth + the extra code here to find out. */ + q = ~(mp_limb_t) 0; + else + { + mp_limb_t r; + + udiv_qrnnd (q, r, n0, np[dsize - 1], dX); + umul_ppmm (n1, n0, d1, q); + + while (n1 > r || (n1 == r && n0 > np[dsize - 2])) + { + q--; + r += dX; + if (r < dX) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + after the calculation of q. Taking advantage of that, we + could make this loop make two iterations less. */ + + cy_limb = mpn_submul_1 (np, dp, dsize, q); + + if (n2 != cy_limb) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} diff --git a/contrib/libgmp/mpn/generic/divrem_1.c b/contrib/libgmp/mpn/generic/divrem_1.c new file mode 100644 index 0000000..d213267 --- /dev/null +++ b/contrib/libgmp/mpn/generic/divrem_1.c @@ -0,0 +1,58 @@ +/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_divrem_1 (mp_ptr qp, mp_size_t qsize, + mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_divrem_1 (qp, qsize, dividend_ptr, dividend_size, divisor_limb) + mp_ptr qp; + mp_size_t qsize; + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_limb_t rlimb; + long i; + + /* Develop integer part of quotient. */ + rlimb = mpn_divmod_1 (qp + qsize, dividend_ptr, dividend_size, divisor_limb); + + if (qsize != 0) + { + for (i = qsize - 1; i >= 0; i--) + udiv_qrnnd (qp[i], rlimb, rlimb, 0, divisor_limb); + } + return rlimb; +} diff --git a/contrib/libgmp/mpn/generic/dump.c b/contrib/libgmp/mpn/generic/dump.c new file mode 100644 index 0000000..a5831c4 --- /dev/null +++ b/contrib/libgmp/mpn/generic/dump.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +void +mpn_dump (ptr, size) + mp_srcptr ptr; + mp_size_t size; +{ + if (size == 0) + printf ("0\n"); + { + while (size) + { + size--; + printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]); + } + printf ("\n"); + } +} diff --git a/contrib/libgmp/mpn/generic/gcd.c b/contrib/libgmp/mpn/generic/gcd.c new file mode 100644 index 0000000..8c2bbf0 --- /dev/null +++ b/contrib/libgmp/mpn/generic/gcd.c @@ -0,0 +1,402 @@ +/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* Integer greatest common divisor of two unsigned integers, using + the accelerated algorithm (see reference below). + + mp_size_t mpn_gcd (vp, vsize, up, usize). + + Preconditions [U = (up, usize) and V = (vp, vsize)]: + + 1. V is odd. + 2. numbits(U) >= numbits(V). + + Both U and V are destroyed by the operation. The result is left at vp, + and its size is returned. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + Refer to + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is + used, otherwise the binary algorithm is used. This may be adjusted for + different architectures. */ +#ifndef ACCEL_THRESHOLD +#define ACCEL_THRESHOLD 4 +#endif + +/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated + algorithm reduces using the bmod operation. Otherwise, the k-ary reduction + is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */ +enum + { + BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 + }; + +#define SIGN_BIT (~(~(mp_limb_t)0 >> 1)) + + +#define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0) +#define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0) +#define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0) +#define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0) + +/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2. + Both U and V must be odd. */ +static __gmp_inline mp_size_t +#if __STDC__ +gcd_2 (mp_ptr vp, mp_srcptr up) +#else +gcd_2 (vp, up) + mp_ptr vp; + mp_srcptr up; +#endif +{ + mp_limb_t u0, u1, v0, v1; + mp_size_t vsize; + + u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1]; + + while (u1 != v1 && u0 != v0) + { + unsigned long int r; + if (u1 > v1) + { + u1 -= v1 + (u0 < v0), u0 -= v0; + count_trailing_zeros (r, u0); + u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r; + u1 >>= r; + } + else /* u1 < v1. */ + { + v1 -= u1 + (v0 < u0), v0 -= u0; + count_trailing_zeros (r, v0); + v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r; + v1 >>= r; + } + } + + vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0); + + /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */ + if (u1 == v1 && u0 == v0) + return vsize; + + v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0; + vp[0] = mpn_gcd_1 (vp, vsize, v0); + + return 1; +} + +/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists + 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB). + In the reference article, D was computed along with N, but it is better to + compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating + the result as a twos' complement signed integer. + + Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference + article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use + 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double + precision. If N2 > N1 initially, the first iteration of the while loop + will swap them. In all other situations, N1 >= N2 is maintained. */ + +static __gmp_inline mp_limb_t +#if __STDC__ +find_a (mp_srcptr cp) +#else +find_a (cp) + mp_srcptr cp; +#endif +{ + unsigned long int leading_zero_bits = 0; + + mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */ + mp_limb_t n1_h = cp[1]; + + mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */ + mp_limb_t n2_h = ~n1_h; + + /* Main loop. */ + while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ + { + /* N1 <-- N1 % N2. */ + if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0) + { + unsigned long int i; + count_leading_zeros (i, n2_h); + i -= leading_zero_bits, leading_zero_bits += i; + n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i; + do + { + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1; + i -= 1; + } + while (i); + } + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + + SWAP_LIMB (n1_h, n2_h); + SWAP_LIMB (n1_l, n2_l); + } + + return n2_l; +} + +mp_size_t +#if __STDC__ +mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize) +#else +mpn_gcd (gp, vp, vsize, up, usize) + mp_ptr gp; + mp_ptr vp; + mp_size_t vsize; + mp_ptr up; + mp_size_t usize; +#endif +{ + mp_ptr orig_vp = vp; + mp_size_t orig_vsize = vsize; + int binary_gcd_ctr; /* Number of times binary gcd will execute. */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD. + Two EXTRA limbs for U and V are required for kary reduction. */ + if (vsize > ACCEL_THRESHOLD) + { + unsigned long int vbitsize, d; + mp_ptr orig_up = up; + mp_size_t orig_usize = usize; + mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB); + + MPN_COPY (anchor_up, orig_up, usize); + up = anchor_up; + + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + /* Use bmod reduction to quickly discover whether V divides U. */ + up[usize++] = 0; /* Insert leading zero. */ + mpn_bdivmod (up, up, usize, vp, vsize, d); + + /* Now skip U/V mod 2^d and any low zero limbs. */ + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + while (usize != 0 && up[0] == 0) + up++, usize--; + + if (usize == 0) /* GCD == ORIG_V. */ + goto done; + + vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB); + MPN_COPY (vp, orig_vp, vsize); + + do /* Main loop. */ + { + if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */ + { + mp_size_t i; + anchor_up[0] = -up[0]; + for (i = 1; i < usize; i++) + anchor_up[i] = ~up[i]; + up = anchor_up; + } + + MPN_NORMALIZE_NOT_ZERO (up, usize); + + if ((up[0] & 1) == 0) /* Result even; remove twos. */ + { + unsigned long int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (anchor_up, up, usize, r); + usize -= (anchor_up[usize-1] == 0); + } + else if (anchor_up != up) + MPN_COPY (anchor_up, up, usize); + + SWAP_MPN (anchor_up, usize, vp, vsize); + up = anchor_up; + + if (vsize <= 2) /* Kary can't handle < 2 limbs and */ + break; /* isn't efficient for == 2 limbs. */ + + d = vbitsize; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + if (d > BMOD_THRESHOLD) /* Bmod reduction. */ + { + up[usize++] = 0; + mpn_bdivmod (up, up, usize, vp, vsize, d); + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + } + else /* Kary reduction. */ + { + mp_limb_t bp[2], cp[2]; + + /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ + cp[0] = vp[0], cp[1] = vp[1]; + mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB); + + /* U <-- find_a (C) * U. */ + up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); + usize++; + + /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). + bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and + bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */ + bp[0] = up[0], bp[1] = up[1]; + mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB); + bp[1] &= 1; /* Since V is odd, division is unnecessary. */ + + up[usize++] = 0; + if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ + { + mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]); + mpn_add_1 (up + vsize, up + vsize, usize - vsize, c); + } + else /* B >= 0: U <-- U - B * V. */ + { + mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]); + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + } + + up += 2, usize -= 2; /* At least two low limbs are zero. */ + } + + /* Must remove low zero limbs before complementing. */ + while (usize != 0 && up[0] == 0) + up++, usize--; + } + while (usize); + + /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */ + up = orig_up, usize = orig_usize; + binary_gcd_ctr = 2; + } + else + binary_gcd_ctr = 1; + + /* Finish up with the binary algorithm. Executes once or twice. */ + for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize) + { + if (usize > 2) /* First make U close to V in size. */ + { + unsigned long int vbitsize, d; + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize - 1; + if (d != -(unsigned long int)1 && d > 2) + { + mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */ + d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d; + } + } + + /* Start binary GCD. */ + do + { + mp_size_t zeros; + + /* Make sure U is odd. */ + MPN_NORMALIZE (up, usize); + while (up[0] == 0) + up += 1, usize -= 1; + if ((up[0] & 1) == 0) + { + unsigned long int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (up, up, usize, r); + usize -= (up[usize-1] == 0); + } + + /* Keep usize >= vsize. */ + if (usize < vsize) + SWAP_MPN (up, usize, vp, vsize); + + if (usize <= 2) /* Double precision. */ + { + if (vsize == 1) + vp[0] = mpn_gcd_1 (up, usize, vp[0]); + else + vsize = gcd_2 (vp, up); + break; /* Binary GCD done. */ + } + + /* Count number of low zero limbs of U - V. */ + for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; ) + continue; + + /* If U < V, swap U and V; in any case, subtract V from U. */ + if (zeros == vsize) /* Subtract done. */ + up += zeros, usize -= zeros; + else if (usize == vsize) + { + mp_size_t size = vsize; + do + size--; + while (up[size] == vp[size]); + if (up[size] < vp[size]) /* usize == vsize. */ + SWAP_PTR (up, vp); + up += zeros, usize = size + 1 - zeros; + mpn_sub_n (up, up, vp + zeros, usize); + } + else + { + mp_size_t size = vsize - zeros; + up += zeros, usize -= zeros; + if (mpn_sub_n (up, up, vp + zeros, size)) + { + while (up[size] == 0) /* Propagate borrow. */ + up[size++] = -(mp_limb_t)1; + up[size] -= 1; + } + } + } + while (usize); /* End binary GCD. */ + } + +done: + if (vp != gp) + MPN_COPY (gp, vp, vsize); + TMP_FREE (marker); + return vsize; +} diff --git a/contrib/libgmp/mpn/generic/gcd_1.c b/contrib/libgmp/mpn/generic/gcd_1.c new file mode 100644 index 0000000..ebcdfb5 --- /dev/null +++ b/contrib/libgmp/mpn/generic/gcd_1.c @@ -0,0 +1,73 @@ +/* mpn_gcd_1 -- + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Does not work for U == 0 or V == 0. It would be tough to make it work for + V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t. */ + +mp_limb_t +mpn_gcd_1 (up, size, vlimb) + mp_srcptr up; + mp_size_t size; + mp_limb_t vlimb; +{ + mp_limb_t ulimb; + unsigned long int u_low_zero_bits, v_low_zero_bits; + + if (size > 1) + { + ulimb = mpn_mod_1 (up, size, vlimb); + if (ulimb == 0) + return vlimb; + } + else + ulimb = up[0]; + + /* Need to eliminate low zero bits. */ + count_trailing_zeros (u_low_zero_bits, ulimb); + ulimb >>= u_low_zero_bits; + + count_trailing_zeros (v_low_zero_bits, vlimb); + vlimb >>= v_low_zero_bits; + + while (ulimb != vlimb) + { + if (ulimb > vlimb) + { + ulimb -= vlimb; + do + ulimb >>= 1; + while ((ulimb & 1) == 0); + } + else /* vlimb > ulimb. */ + { + vlimb -= ulimb; + do + vlimb >>= 1; + while ((vlimb & 1) == 0); + } + } + + return ulimb << MIN (u_low_zero_bits, v_low_zero_bits); +} diff --git a/contrib/libgmp/mpn/generic/gcdext.c b/contrib/libgmp/mpn/generic/gcdext.c new file mode 100644 index 0000000..245e20a --- /dev/null +++ b/contrib/libgmp/mpn/generic/gcdext.c @@ -0,0 +1,441 @@ +/* mpn_gcdext -- Extended Greatest Common Divisor. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef EXTEND +#define EXTEND 1 +#endif + +#if STAT +int arr[BITS_PER_MP_LIMB]; +#endif + +#define SGN(A) (((A) < 0) ? -1 : ((A) > 0)) + +/* Idea 1: After we have performed a full division, don't shift operands back, + but instead account for the extra factors-of-2 thus introduced. + Idea 2: Simple generalization to use divide-and-conquer would give us an + algorithm that runs faster than O(n^2). + Idea 3: The input numbers need less space as the computation progresses, + while the s0 and s1 variables need more space. To save space, we + could make them share space, and have the latter variables grow + into the former. */ + +/* Precondition: U >= V. */ + +mp_size_t +#if EXTEND +#if __STDC__ +mpn_gcdext (mp_ptr gp, mp_ptr s0p, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcdext (gp, s0p, up, size, vp, vsize) + mp_ptr gp; + mp_ptr s0p; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#else +#if __STDC__ +mpn_gcd (mp_ptr gp, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcd (gp, up, size, vp, vsize) + mp_ptr gp; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#endif +{ + mp_limb_t uh, vh; + mp_limb_signed_t A, B, C, D; + int cnt; + mp_ptr tp, wp; +#if RECORD + mp_limb_signed_t min = 0, max = 0; +#endif +#if EXTEND + mp_ptr s1p; + mp_ptr orig_s0p = s0p; + mp_size_t ssize, orig_size = size; + TMP_DECL (mark); + + TMP_MARK (mark); + + tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB); + + MPN_ZERO (s0p, size); + MPN_ZERO (s1p, size); + + s0p[0] = 1; + s1p[0] = 0; + ssize = 1; +#endif + + if (size > vsize) + { + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + /* This is really what it boils down to in this case... */ + s0p[0] = 0; + s1p[0] = 1; +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + { + mp_ptr xp; + xp = up; up = vp; vp = xp; + } + } + + for (;;) + { + /* Figure out exact size of V. */ + vsize = size; + MPN_NORMALIZE (vp, vsize); + if (vsize <= 1) + break; + + /* Make UH be the most significant limb of U, and make VH be + corresponding bits from V. */ + uh = up[size - 1]; + vh = vp[size - 1]; + count_leading_zeros (cnt, uh); + if (cnt != 0) + { + uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + } + +#if 0 + /* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes + room for sign bit. */ + uh >>= 1; + vh >>= 1; +#endif + A = 1; + B = 0; + C = 0; + D = 1; + + for (;;) + { + mp_limb_signed_t q, T; + if (vh + C == 0 || vh + D == 0) + break; + + q = (uh + A) / (vh + C); + if (q != (uh + B) / (vh + D)) + break; + + T = A - q * C; + A = C; + C = T; + T = B - q * D; + B = D; + D = T; + T = uh - q * vh; + uh = vh; + vh = T; + } + +#if RECORD + min = MIN (A, min); min = MIN (B, min); + min = MIN (C, min); min = MIN (D, min); + max = MAX (A, max); max = MAX (B, max); + max = MAX (C, max); max = MAX (D, max); +#endif + + if (B == 0) + { + mp_limb_t qh; + mp_size_t i; + + /* This is quite rare. I.e., optimize something else! */ + + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + qh = mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + MPN_COPY (tp, s0p, ssize); + for (i = 0; i < size - vsize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]); + if (cy != 0) + tp[ssize++] = cy; + } + if (qh != 0) + { + mp_limb_t cy; + abort (); + /* XXX since qh == 1, mpn_addmul_1 is overkill */ + cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh); + if (cy != 0) + tp[ssize++] = cy; + } +#if 0 + MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */ + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + + { + mp_ptr xp; + xp = up; up = vp; vp = xp; + } + MPN_NORMALIZE (up, size); + } + else + { + /* T = U*A + V*B + W = U*C + V*D + U = T + V = W */ + + if (SGN(A) == SGN(B)) /* should be different sign */ + abort (); + if (SGN(C) == SGN(D)) /* should be different sign */ + abort (); +#if STAT + { mp_limb_t x; + x = ABS (A) | ABS (B) | ABS (C) | ABS (D); + count_leading_zeros (cnt, x); + arr[BITS_PER_MP_LIMB - cnt]++; } +#endif + if (A == 0) + { + if (B != 1) abort (); + MPN_COPY (tp, vp, size); + } + else + { + if (A < 0) + { + mpn_mul_1 (tp, vp, size, B); + mpn_submul_1 (tp, up, size, -A); + } + else + { + mpn_mul_1 (tp, up, size, A); + mpn_submul_1 (tp, vp, size, -B); + } + } + if (C < 0) + { + mpn_mul_1 (wp, vp, size, D); + mpn_submul_1 (wp, up, size, -C); + } + else + { + mpn_mul_1 (wp, up, size, C); + mpn_submul_1 (wp, vp, size, -D); + } + + { + mp_ptr xp; + xp = tp; tp = up; up = xp; + xp = wp; wp = vp; vp = xp; + } + +#if EXTEND + { mp_limb_t cy; + MPN_ZERO (tp, orig_size); + if (A == 0) + { + if (B != 1) abort (); + MPN_COPY (tp, s1p, ssize); + } + else + { + if (A < 0) + { + cy = mpn_mul_1 (tp, s1p, ssize, B); + cy += mpn_addmul_1 (tp, s0p, ssize, -A); + } + else + { + cy = mpn_mul_1 (tp, s0p, ssize, A); + cy += mpn_addmul_1 (tp, s1p, ssize, -B); + } + if (cy != 0) + tp[ssize++] = cy; + } + MPN_ZERO (wp, orig_size); + if (C < 0) + { + cy = mpn_mul_1 (wp, s1p, ssize, D); + cy += mpn_addmul_1 (wp, s0p, ssize, -C); + } + else + { + cy = mpn_mul_1 (wp, s0p, ssize, C); + cy += mpn_addmul_1 (wp, s1p, ssize, -D); + } + if (cy != 0) + wp[ssize++] = cy; + } + { + mp_ptr xp; + xp = tp; tp = s0p; s0p = xp; + xp = wp; wp = s1p; s1p = xp; + } +#endif +#if 0 /* Is it a win to remove multiple zeros here? */ + MPN_NORMALIZE (up, size); +#else + if (up[size - 1] == 0) + size--; +#endif + } + } + +#if RECORD + printf ("min: %ld\n", min); + printf ("max: %ld\n", max); +#endif + + if (vsize == 0) + { + if (gp != up) + MPN_COPY (gp, up, size); +#if EXTEND + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); +#endif + TMP_FREE (mark); + return size; + } + else + { + mp_limb_t vl, ul, t; +#if EXTEND + mp_limb_t cy; + mp_size_t i; +#endif + vl = vp[0]; +#if EXTEND + t = mpn_divmod_1 (wp, up, size, vl); + MPN_COPY (tp, s0p, ssize); + for (i = 0; i < size; i++) + { + cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]); + if (cy != 0) + tp[ssize++] = cy; + } +#if 0 + MPN_COPY (s0p, s1p, ssize); + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif +#else + t = mpn_mod_1 (up, size, vl); +#endif + ul = vl; + vl = t; + while (vl != 0) + { + mp_limb_t t; +#if EXTEND + mp_limb_t q, cy; + q = ul / vl; + t = ul - q*vl; + + MPN_COPY (tp, s0p, ssize); + cy = mpn_addmul_1 (tp, s1p, ssize, q); + if (cy != 0) + tp[ssize++] = cy; +#if 0 + MPN_COPY (s0p, s1p, ssize); + MPN_COPY (s1p, tp, ssize); +#else + { + mp_ptr xp; + xp = s0p; s0p = s1p; s1p = xp; + xp = s1p; s1p = tp; tp = xp; + } +#endif + +#else + t = ul % vl; +#endif + ul = vl; + vl = t; + } + gp[0] = ul; +#if EXTEND + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); +#endif + TMP_FREE (mark); + return 1; + } +} diff --git a/contrib/libgmp/mpn/generic/get_str.c b/contrib/libgmp/mpn/generic/get_str.c new file mode 100644 index 0000000..0e7fc60 --- /dev/null +++ b/contrib/libgmp/mpn/generic/get_str.c @@ -0,0 +1,211 @@ +/* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR + to a printable string in STR in base BASE. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Convert the limb vector pointed to by MPTR and MSIZE long to a + char array, using base BASE for the result array. Store the + result in the character array STR. STR must point to an array with + space for the largest possible number represented by a MSIZE long + limb vector + 1 extra character. + + The result is NOT in Ascii, to convert it to printable format, add + '0' or 'A' depending on the base and range. + + Return the number of digits in the result string. + This may include some leading zeros. + + The limb vector pointed to by MPTR is clobbered. */ + +size_t +mpn_get_str (str, base, mptr, msize) + unsigned char *str; + int base; + mp_ptr mptr; + mp_size_t msize; +{ + mp_limb_t big_base; +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + int normalization_steps; +#endif +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb_t big_base_inverted; +#endif + unsigned int dig_per_u; + mp_size_t out_len; + register unsigned char *s; + + big_base = __mp_bases[base].big_base; + + s = str; + + /* Special case zero, as the code below doesn't handle it. */ + if (msize == 0) + { + s[0] = 0; + return 1; + } + + if ((base & (base - 1)) == 0) + { + /* The base is a power of 2. Make conversion from most + significant side. */ + mp_limb_t n1, n0; + register int bits_per_digit = big_base; + register int x; + register int bit_pos; + register int i; + + n1 = mptr[msize - 1]; + count_leading_zeros (x, n1); + + /* BIT_POS should be R when input ends in least sign. nibble, + R + bits_per_digit * n when input ends in n:th least significant + nibble. */ + + { + int bits; + + bits = BITS_PER_MP_LIMB * msize - x; + x = bits % bits_per_digit; + if (x != 0) + bits += bits_per_digit - x; + bit_pos = bits - (msize - 1) * BITS_PER_MP_LIMB; + } + + /* Fast loop for bit output. */ + i = msize - 1; + for (;;) + { + bit_pos -= bits_per_digit; + while (bit_pos >= 0) + { + *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1); + bit_pos -= bits_per_digit; + } + i--; + if (i < 0) + break; + n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1); + n1 = mptr[i]; + bit_pos += BITS_PER_MP_LIMB; + *s++ = n0 | (n1 >> bit_pos); + } + + *s = 0; + + return s - str; + } + else + { + /* General case. The base is not a power of 2. Make conversion + from least significant end. */ + + /* If udiv_qrnnd only handles divisors with the most significant bit + set, prepare BIG_BASE for being a divisor by shifting it to the + left exactly enough to set the most significant bit. */ +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + count_leading_zeros (normalization_steps, big_base); + big_base <<= normalization_steps; +#if UDIV_TIME > 2 * UMUL_TIME + /* Get the fixed-point approximation to 1/(BIG_BASE << NORMALIZATION_STEPS). */ + big_base_inverted = __mp_bases[base].big_base_inverted; +#endif +#endif + + dig_per_u = __mp_bases[base].chars_per_limb; + out_len = ((size_t) msize * BITS_PER_MP_LIMB + * __mp_bases[base].chars_per_bit_exactly) + 1; + s += out_len; + + while (msize != 0) + { + int i; + mp_limb_t n0, n1; + +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + /* If we shifted BIG_BASE above, shift the dividend too, to get + the right quotient. We need to do this every loop, + since the intermediate quotients are OK, but the quotient from + one turn in the loop is going to be the dividend in the + next turn, and the dividend needs to be up-shifted. */ + if (normalization_steps != 0) + { + n0 = mpn_lshift (mptr, mptr, msize, normalization_steps); + + /* If the shifting gave a carry out limb, store it and + increase the length. */ + if (n0 != 0) + { + mptr[msize] = n0; + msize++; + } + } +#endif + + /* Divide the number at TP with BIG_BASE to get a quotient and a + remainder. The remainder is our new digit in base BIG_BASE. */ + i = msize - 1; + n1 = mptr[i]; + + if (n1 >= big_base) + n1 = 0; + else + { + msize--; + i--; + } + + for (; i >= 0; i--) + { + n0 = mptr[i]; +#if UDIV_TIME > 2 * UMUL_TIME + udiv_qrnnd_preinv (mptr[i], n1, n1, n0, big_base, big_base_inverted); +#else + udiv_qrnnd (mptr[i], n1, n1, n0, big_base); +#endif + } + +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + /* If we shifted above (at previous UDIV_NEEDS_NORMALIZATION tests) + the remainder will be up-shifted here. Compensate. */ + n1 >>= normalization_steps; +#endif + + /* Convert N1 from BIG_BASE to a string of digits in BASE + using single precision operations. */ + for (i = dig_per_u - 1; i >= 0; i--) + { + *--s = n1 % base; + n1 /= base; + if (n1 == 0 && msize == 0) + break; + } + } + + while (s != str) + *--s = 0; + return out_len; + } +} diff --git a/contrib/libgmp/mpn/generic/gmp-mparam.h b/contrib/libgmp/mpn/generic/gmp-mparam.h new file mode 100644 index 0000000..7c88557 --- /dev/null +++ b/contrib/libgmp/mpn/generic/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/contrib/libgmp/mpn/generic/hamdist.c b/contrib/libgmp/mpn/generic/hamdist.c new file mode 100644 index 0000000..2190b63 --- /dev/null +++ b/contrib/libgmp/mpn/generic/hamdist.c @@ -0,0 +1,88 @@ +/* mpn_hamdist -- + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined __GNUC__ +#if defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 +#define popc_limb(a) \ + ({ \ + DItype __res; \ + asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \ + __res; \ + }) +#endif +#endif + +#ifndef popc_limb + +/* Cool population count of a mp_limb_t. + You have to figure out how this works, I won't tell you! */ + +static inline unsigned int +popc_limb (x) + mp_limb_t x; +{ +#if BITS_PER_MP_LIMB == 64 + /* We have to go into some trouble to define these constants. + (For mp_limb_t being `long long'.) */ + mp_limb_t cnst; + cnst = 0x55555555L | ((mp_limb_t) 0x55555555L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 1) + (x & cnst); + cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 2) + (x & cnst); + cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2); + x = ((x >> 4) + x) & cnst; + x = ((x >> 8) + x); + x = ((x >> 16) + x); + x = ((x >> 32) + x) & 0xff; +#endif +#if BITS_PER_MP_LIMB == 32 + x = ((x >> 1) & 0x55555555L) + (x & 0x55555555L); + x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L); + x = ((x >> 4) + x) & 0x0f0f0f0fL; + x = ((x >> 8) + x); + x = ((x >> 16) + x) & 0xff; +#endif + return x; +} +#endif + +unsigned long int +#if __STDC__ +mpn_hamdist (mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +mpn_hamdist (up, vp, size) + register mp_srcptr up; + register mp_srcptr vp; + register mp_size_t size; +#endif +{ + unsigned long int hamdist; + mp_size_t i; + + hamdist = 0; + for (i = 0; i < size; i++) + hamdist += popc_limb (up[i] ^ vp[i]); + + return hamdist; +} diff --git a/contrib/libgmp/mpn/generic/inlines.c b/contrib/libgmp/mpn/generic/inlines.c new file mode 100644 index 0000000..dca305e --- /dev/null +++ b/contrib/libgmp/mpn/generic/inlines.c @@ -0,0 +1,3 @@ +#define _FORCE_INLINES +#define _EXTERN_INLINE /* empty */ +#include "gmp.h" diff --git a/contrib/libgmp/mpn/generic/lshift.c b/contrib/libgmp/mpn/generic/lshift.c new file mode 100644 index 0000000..e244bc5 --- /dev/null +++ b/contrib/libgmp/mpn/generic/lshift.c @@ -0,0 +1,87 @@ +/* mpn_lshift -- Shift left low level. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + and store the USIZE least significant digits of the result at WP. + Return the bits shifted out from the most significant digit. + + Argument constraints: + 1. 0 < CNT < BITS_PER_MP_LIMB + 2. If the result is to be written over the input, WP must be >= UP. +*/ + +mp_limb_t +#if __STDC__ +mpn_lshift (register mp_ptr wp, + register mp_srcptr up, mp_size_t usize, + register unsigned int cnt) +#else +mpn_lshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +#endif +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from high end to low end, to allow specified input/output + overlapping. */ + for (i = usize - 1; i >= 0; i--) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp += 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) + { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} diff --git a/contrib/libgmp/mpn/generic/mod_1.c b/contrib/libgmp/mpn/generic/mod_1.c new file mode 100644 index 0000000..314d11b --- /dev/null +++ b/contrib/libgmp/mpn/generic/mod_1.c @@ -0,0 +1,197 @@ +/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + +Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + here (not udiv_qrnnd). */ + +mp_limb_t +#if __STDC__ +mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_size_t i; + mp_limb_t n1, n0, r; + int dummy; + + /* Botch: Should this be handled at all? Rely on callers? */ + if (dividend_size == 0) + return 0; + + /* If multiplication is much faster than division, and the + dividend is large, pre-invert the divisor, and use + only multiplications in the inner loop. */ + + /* This test should be read: + Does it ever help to use udiv_qrnnd_preinv? + && Does what we save compensate for the inversion overhead? */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + mp_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + udiv_qrnnd_preinv (dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else + { + mp_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + most significant bit (with weight 2**N) implicit. */ + + /* Special case for DIVISOR_LIMB == 100...000. */ + if (divisor_limb << 1 == 0) + divisor_limb_inverted = ~(mp_limb_t) 0; + else + udiv_qrnnd (divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else + { + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + it, or because DIVISOR_LIMB is already normalized. */ + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/contrib/libgmp/mpn/generic/mul.c b/contrib/libgmp/mpn/generic/mul.c new file mode 100644 index 0000000..dcf8cb4 --- /dev/null +++ b/contrib/libgmp/mpn/generic/mul.c @@ -0,0 +1,152 @@ +/* mpn_mul -- Multiply two natural numbers. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + and v (pointed to by VP, with VSIZE limbs), and store the result at + PRODP. USIZE + VSIZE limbs are always stored, but if the input + operands are normalized. Return the most significant limb of the + result. + + NOTE: The space pointed to by PRODP is overwritten before finished + with U and V, so overlap is an error. + + Argument constraints: + 1. USIZE >= VSIZE. + 2. PRODP != UP and PRODP != VP, i.e. the destination + must be distinct from the multiplier and the multiplicand. */ + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + value which is good on most machines. */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 32 +#endif + +mp_limb_t +#if __STDC__ +mpn_mul (mp_ptr prodp, + mp_srcptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize) +#else +mpn_mul (prodp, up, usize, vp, vsize) + mp_ptr prodp; + mp_srcptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; +#endif +{ + mp_ptr prod_endp = prodp + usize + vsize - 1; + mp_limb_t cy; + mp_ptr tspace; + TMP_DECL (marker); + + if (vsize < KARATSUBA_THRESHOLD) + { + /* Handle simple cases with traditional multiplication. + + This is the most critical code of the entire function. All + multiplies rely on this, both small and huge. Small ones arrive + here immediately. Huge ones arrive here as this is the base case + for Karatsuba's recursive algorithm below. */ + mp_size_t i; + mp_limb_t cy_limb; + mp_limb_t v_limb; + + if (vsize == 0) + return 0; + + /* Multiply by the first limb in V separately, as the result can be + stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) + { + if (v_limb == 1) + MPN_COPY (prodp, up, usize); + else + MPN_ZERO (prodp, usize); + cy_limb = 0; + } + else + cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); + + prodp[usize] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + U with one limb from V, and add it to PROD. */ + for (i = 1; i < vsize; i++) + { + v_limb = vp[i]; + if (v_limb <= 1) + { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpn_add_n (prodp, prodp, up, usize); + } + else + cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); + + prodp[usize] = cy_limb; + prodp++; + } + return cy_limb; + } + + TMP_MARK (marker); + + tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); + MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); + + prodp += vsize; + up += vsize; + usize -= vsize; + if (usize >= vsize) + { + mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); + do + { + MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); + cy = mpn_add_n (prodp, prodp, tp, vsize); + mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); + prodp += vsize; + up += vsize; + usize -= vsize; + } + while (usize >= vsize); + } + + /* True: usize < vsize. */ + + /* Make life simple: Recurse. */ + + if (usize != 0) + { + mpn_mul (tspace, vp, vsize, up, usize); + cy = mpn_add_n (prodp, prodp, tspace, vsize); + mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); + } + + TMP_FREE (marker); + return *prod_endp; +} diff --git a/contrib/libgmp/mpn/generic/mul_1.c b/contrib/libgmp/mpn/generic/mul_1.c new file mode 100644 index 0000000..2de680a --- /dev/null +++ b/contrib/libgmp/mpn/generic/mul_1.c @@ -0,0 +1,59 @@ +/* mpn_mul_1 -- Multiply a limb vector with a single limb and + store the product in a second limb vector. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/contrib/libgmp/mpn/generic/mul_n.c b/contrib/libgmp/mpn/generic/mul_n.c new file mode 100644 index 0000000..b38e8ad --- /dev/null +++ b/contrib/libgmp/mpn/generic/mul_n.c @@ -0,0 +1,401 @@ +/* mpn_mul_n -- Multiply two natural numbers of length n. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + always stored. Return the most significant limb. + + Argument constraints: + 1. PRODP != UP and PRODP != VP, i.e. the destination + must be distinct from the multiplier and the multiplicand. */ + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + value which is good on most machines. */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 32 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + +/* Handle simple cases with traditional multiplication. + + This is the most critical code of multiplication. All multiplies rely + on this, both small and huge. Small ones arrive here immediately. Huge + ones arrive here as this is the base case for Karatsuba's recursive + algorithm below. */ + +void +#if __STDC__ +impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +impn_mul_n_basecase (prodp, up, vp, size) + mp_ptr prodp; + mp_srcptr up; + mp_srcptr vp; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t cy_limb; + mp_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) + { + if (v_limb == 1) + MPN_COPY (prodp, up, size); + else + MPN_ZERO (prodp, size); + cy_limb = 0; + } + else + cy_limb = mpn_mul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) + { + v_limb = vp[i]; + if (v_limb <= 1) + { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpn_add_n (prodp, prodp, up, size); + } + else + cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +#if __STDC__ +impn_mul_n (mp_ptr prodp, + mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace) +#else +impn_mul_n (prodp, up, vp, size, tspace) + mp_ptr prodp; + mp_srcptr up; + mp_srcptr vp; + mp_size_t size; + mp_ptr tspace; +#endif +{ + if ((size & 1) != 0) + { + /* The size is odd, the code code below doesn't handle that. + Multiply the least significant (size - 1) limbs with a recursive + call, and handle the most significant limb of S1 and S2 + separately. */ + /* A slightly faster way to do this would be to make the Karatsuba + code below behave as if the size were even, and let it check for + odd size in the end. I.e., in essence move this code to the end. + Doing so would save us a recursive call, and potentially make the + stack grow a lot less. */ + + mp_size_t esize = size - 1; /* even size */ + mp_limb_t cy_limb; + + MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace); + cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]); + + prodp[esize + size] = cy_limb; + } + else + { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + + Split U in two pieces, U1 and U0, such that + U = U0 + U1*(B**n), + and V in V1 and V0, such that + V = V0 + V1*(B**n). + + UV is then computed recursively using the identity + + 2n n n n + UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + 1 1 1 0 0 1 0 0 + + Where B = 2**BITS_PER_MP_LIMB. */ + + mp_size_t hsize = size >> 1; + mp_limb_t cy; + int negflg; + + /*** Product H. ________________ ________________ + |_____U1 x V1____||____U0 x V0_____| */ + /* Put result in upper part of PROD and pass low part of TSPACE + as new TSPACE. */ + MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace); + + /*** Product M. ________________ + |_(U1-U0)(V0-V1)_| */ + if (mpn_cmp (up + hsize, up, hsize) >= 0) + { + mpn_sub_n (prodp, up + hsize, up, hsize); + negflg = 0; + } + else + { + mpn_sub_n (prodp, up, up + hsize, hsize); + negflg = 1; + } + if (mpn_cmp (vp + hsize, vp, hsize) >= 0) + { + mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } + else + { + mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size); + + /*** Add/copy product H. */ + MPN_COPY (prodp + hsize, prodp + size, hsize); + cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); + + /*** Add product M (if NEGFLG M is a negative number). */ + if (negflg) + cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); + else + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + + /*** Product L. ________________ ________________ + |________________||____U0 x V0_____| */ + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size); + + /*** Add/copy Product L (twice). */ + + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); + + MPN_COPY (prodp, tspace, hsize); + cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if (cy) + mpn_add_1 (prodp + size, prodp + size, size, 1); + } +} + +void +#if __STDC__ +impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size) +#else +impn_sqr_n_basecase (prodp, up, size) + mp_ptr prodp; + mp_srcptr up; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t cy_limb; + mp_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if (v_limb <= 1) + { + if (v_limb == 1) + MPN_COPY (prodp, up, size); + else + MPN_ZERO (prodp, size); + cy_limb = 0; + } + else + cy_limb = mpn_mul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) + { + v_limb = up[i]; + if (v_limb <= 1) + { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpn_add_n (prodp, prodp, up, size); + } + else + cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +#if __STDC__ +impn_sqr_n (mp_ptr prodp, + mp_srcptr up, mp_size_t size, mp_ptr tspace) +#else +impn_sqr_n (prodp, up, size, tspace) + mp_ptr prodp; + mp_srcptr up; + mp_size_t size; + mp_ptr tspace; +#endif +{ + if ((size & 1) != 0) + { + /* The size is odd, the code code below doesn't handle that. + Multiply the least significant (size - 1) limbs with a recursive + call, and handle the most significant limb of S1 and S2 + separately. */ + /* A slightly faster way to do this would be to make the Karatsuba + code below behave as if the size were even, and let it check for + odd size in the end. I.e., in essence move this code to the end. + Doing so would save us a recursive call, and potentially make the + stack grow a lot less. */ + + mp_size_t esize = size - 1; /* even size */ + mp_limb_t cy_limb; + + MPN_SQR_N_RECURSE (prodp, up, esize, tspace); + cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]); + + prodp[esize + size] = cy_limb; + } + else + { + mp_size_t hsize = size >> 1; + mp_limb_t cy; + + /*** Product H. ________________ ________________ + |_____U1 x U1____||____U0 x U0_____| */ + /* Put result in upper part of PROD and pass low part of TSPACE + as new TSPACE. */ + MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace); + + /*** Product M. ________________ + |_(U1-U0)(U0-U1)_| */ + if (mpn_cmp (up + hsize, up, hsize) >= 0) + { + mpn_sub_n (prodp, up + hsize, up, hsize); + } + else + { + mpn_sub_n (prodp, up, up + hsize, hsize); + } + + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size); + + /*** Add/copy product H. */ + MPN_COPY (prodp + hsize, prodp + size, hsize); + cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); + + /*** Add product M (if NEGFLG M is a negative number). */ + cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); + + /*** Product L. ________________ ________________ + |________________||____U0 x U0_____| */ + /* Read temporary operands from low part of PROD. + Put result in low part of TSPACE using upper part of TSPACE + as new TSPACE. */ + MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size); + + /*** Add/copy Product L (twice). */ + + cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); + + MPN_COPY (prodp, tspace, hsize); + cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if (cy) + mpn_add_1 (prodp + size, prodp + size, size, 1); + } +} + +/* This should be made into an inline function in gmp.h. */ +inline void +#if __STDC__ +mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +mpn_mul_n (prodp, up, vp, size) + mp_ptr prodp; + mp_srcptr up; + mp_srcptr vp; + mp_size_t size; +#endif +{ + TMP_DECL (marker); + TMP_MARK (marker); + if (up == vp) + { + if (size < KARATSUBA_THRESHOLD) + { + impn_sqr_n_basecase (prodp, up, size); + } + else + { + mp_ptr tspace; + tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); + impn_sqr_n (prodp, up, size, tspace); + } + } + else + { + if (size < KARATSUBA_THRESHOLD) + { + impn_mul_n_basecase (prodp, up, vp, size); + } + else + { + mp_ptr tspace; + tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); + impn_mul_n (prodp, up, vp, size, tspace); + } + } + TMP_FREE (marker); +} diff --git a/contrib/libgmp/mpn/generic/perfsqr.c b/contrib/libgmp/mpn/generic/perfsqr.c new file mode 100644 index 0000000..5a6e2af --- /dev/null +++ b/contrib/libgmp/mpn/generic/perfsqr.c @@ -0,0 +1,138 @@ +/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, + zero otherwise. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +#if BITS_PER_MP_LIMB == 32 +#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */ +#define PP_INVERTED 0x53E5645CL +#endif + +#if BITS_PER_MP_LIMB == 64 +#define PP 0xE221F97C30E94E1DL /* 3 x 5 x 7 x 11 x 13 x ... x 53 */ +#define PP_INVERTED 0x21CFE6CFC938B36BL +#endif + +/* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue + modulo 0x100. */ +static unsigned char const sq_res_0x100[0x100] = +{ + 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +}; + +int +#if __STDC__ +mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) +#else +mpn_perfect_square_p (up, usize) + mp_srcptr up; + mp_size_t usize; +#endif +{ + mp_limb_t rem; + mp_ptr root_ptr; + int res; + TMP_DECL (marker); + + /* The first test excludes 55/64 (85.9%) of the perfect square candidates + in O(1) time. */ + if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0) + return 0; + +#if defined (PP) + /* The second test excludes 30652543/30808063 (99.5%) of the remaining + perfect square candidates in O(n) time. */ + + /* Firstly, compute REM = A mod PP. */ + if (UDIV_TIME > (2 * UMUL_TIME + 6)) + rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); + else + rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); + + /* Now decide if REM is a quadratic residue modulo the factors in PP. */ + + /* If A is just a few limbs, computing the square root does not take long + time, so things might run faster if we limit this loop according to the + size of A. */ + +#if BITS_PER_MP_LIMB == 64 + if (((0x12DD703303AED3L >> rem % 53) & 1) == 0) + return 0; + if (((0x4351B2753DFL >> rem % 47) & 1) == 0) + return 0; + if (((0x35883A3EE53L >> rem % 43) & 1) == 0) + return 0; + if (((0x1B382B50737L >> rem % 41) & 1) == 0) + return 0; + if (((0x165E211E9BL >> rem % 37) & 1) == 0) + return 0; + if (((0x121D47B7L >> rem % 31) & 1) == 0) + return 0; +#endif + if (((0x13D122F3L >> rem % 29) & 1) == 0) + return 0; + if (((0x5335FL >> rem % 23) & 1) == 0) + return 0; + if (((0x30AF3L >> rem % 19) & 1) == 0) + return 0; + if (((0x1A317L >> rem % 17) & 1) == 0) + return 0; + if (((0x161BL >> rem % 13) & 1) == 0) + return 0; + if (((0x23BL >> rem % 11) & 1) == 0) + return 0; + if (((0x017L >> rem % 7) & 1) == 0) + return 0; + if (((0x13L >> rem % 5) & 1) == 0) + return 0; + if (((0x3L >> rem % 3) & 1) == 0) + return 0; +#endif + + TMP_MARK (marker); + + /* For the third and last test, we finally compute the square root, + to make sure we've really got a perfect square. */ + root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB); + + /* Iff mpn_sqrtrem returns zero, the square is perfect. */ + res = ! mpn_sqrtrem (root_ptr, NULL, up, usize); + TMP_FREE (marker); + return res; +} diff --git a/contrib/libgmp/mpn/generic/popcount.c b/contrib/libgmp/mpn/generic/popcount.c new file mode 100644 index 0000000..c48573a --- /dev/null +++ b/contrib/libgmp/mpn/generic/popcount.c @@ -0,0 +1,87 @@ +/* popcount.c + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined __GNUC__ +#if defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 +#define popc_limb(a) \ + ({ \ + DItype __res; \ + asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \ + __res; \ + }) +#endif +#endif + +#ifndef popc_limb + +/* Cool population count of a mp_limb_t. + You have to figure out how this works, I won't tell you! */ + +static inline unsigned int +popc_limb (x) + mp_limb_t x; +{ +#if BITS_PER_MP_LIMB == 64 + /* We have to go into some trouble to define these constants. + (For mp_limb_t being `long long'.) */ + mp_limb_t cnst; + cnst = 0x55555555L | ((mp_limb_t) 0x55555555L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 1) + (x & cnst); + cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 2) + (x & cnst); + cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2); + x = ((x >> 4) + x) & cnst; + x = ((x >> 8) + x); + x = ((x >> 16) + x); + x = ((x >> 32) + x) & 0xff; +#endif +#if BITS_PER_MP_LIMB == 32 + x = ((x >> 1) & 0x55555555L) + (x & 0x55555555L); + x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L); + x = ((x >> 4) + x) & 0x0f0f0f0fL; + x = ((x >> 8) + x); + x = ((x >> 16) + x) & 0xff; +#endif + return x; +} +#endif + +unsigned long int +#if __STDC__ +mpn_popcount (register mp_srcptr p, register mp_size_t size) +#else +mpn_popcount (p, size) + register mp_srcptr p; + register mp_size_t size; +#endif +{ + unsigned long int popcnt; + mp_size_t i; + + popcnt = 0; + for (i = 0; i < size; i++) + popcnt += popc_limb (p[i]); + + return popcnt; +} diff --git a/contrib/libgmp/mpn/generic/pre_mod_1.c b/contrib/libgmp/mpn/generic/pre_mod_1.c new file mode 100644 index 0000000..92d413b --- /dev/null +++ b/contrib/libgmp/mpn/generic/pre_mod_1.c @@ -0,0 +1,69 @@ +/* mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, + divisor_limb_inverted) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by the normalized DIVISOR_LIMB. + DIVISOR_LIMB_INVERTED should be 2^(2*BITS_PER_MP_LIMB) / DIVISOR_LIMB + + - 2^BITS_PER_MP_LIMB. + Return the single-limb remainder. + +Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +mp_limb_t +#if __STDC__ +mpn_preinv_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb, mp_limb_t divisor_limb_inverted) +#else +mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, divisor_limb_inverted) + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; + mp_limb_t divisor_limb_inverted; +#endif +{ + mp_size_t i; + mp_limb_t n0, r; + int dummy; + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, n0, divisor_limb, divisor_limb_inverted); + } + return r; +} diff --git a/contrib/libgmp/mpn/generic/random2.c b/contrib/libgmp/mpn/generic/random2.c new file mode 100644 index 0000000..2954608 --- /dev/null +++ b/contrib/libgmp/mpn/generic/random2.c @@ -0,0 +1,93 @@ +/* mpn_random2 -- Generate random numbers with relatively long strings + of ones and zeroes. Suitable for border testing. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined (__hpux) || defined (alpha__) || defined (__svr4__) || defined (__SVR4) +/* HPUX lacks random(). DEC OSF/1 1.2 random() returns a double. */ +long mrand48 (); +static inline long +random () +{ + return mrand48 (); +} +#else +long random (); +#endif + +/* It's a bit tricky to get this right, so please test the code well + if you hack with it. Some early versions of the function produced + random numbers with the leading limb == 0, and some versions never + made the most significant bit set. */ + +void +mpn_random2 (res_ptr, size) + mp_ptr res_ptr; + mp_size_t size; +{ + int n_bits; + int bit_pos; + mp_size_t limb_pos; + unsigned int ran; + mp_limb_t limb; + + limb = 0; + + /* Start off in a random bit position in the most significant limb. */ + bit_pos = random () & (BITS_PER_MP_LIMB - 1); + + /* Least significant bit of RAN chooses string of ones/string of zeroes. + Make most significant limb be non-zero by setting bit 0 of RAN. */ + ran = random () | 1; + + for (limb_pos = size - 1; limb_pos >= 0; ) + { + n_bits = (ran >> 1) % BITS_PER_MP_LIMB + 1; + if ((ran & 1) != 0) + { + /* Generate a string of ones. */ + if (n_bits >= bit_pos) + { + res_ptr[limb_pos--] = limb | ((((mp_limb_t) 2) << bit_pos) - 1); + bit_pos += BITS_PER_MP_LIMB; + limb = (~(mp_limb_t) 0) << (bit_pos - n_bits); + } + else + { + limb |= ((((mp_limb_t) 1) << n_bits) - 1) << (bit_pos - n_bits + 1); + } + } + else + { + /* Generate a string of zeroes. */ + if (n_bits >= bit_pos) + { + res_ptr[limb_pos--] = limb; + limb = 0; + bit_pos += BITS_PER_MP_LIMB; + } + } + bit_pos -= n_bits; + ran = random (); + } +} diff --git a/contrib/libgmp/mpn/generic/rshift.c b/contrib/libgmp/mpn/generic/rshift.c new file mode 100644 index 0000000..804f9be --- /dev/null +++ b/contrib/libgmp/mpn/generic/rshift.c @@ -0,0 +1,88 @@ +/* mpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + and store the USIZE least significant limbs of the result at WP. + The bits shifted out to the right are returned. + + Argument constraints: + 1. 0 < CNT < BITS_PER_MP_LIMB + 2. If the result is to be written over the input, WP must be <= UP. +*/ + +mp_limb_t +#if __STDC__ +mpn_rshift (register mp_ptr wp, + register mp_srcptr up, mp_size_t usize, + register unsigned int cnt) +#else +mpn_rshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +#endif +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; + +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from low end to high end, to allow specified input/output + overlapping. */ + for (i = 0; i < usize; i++) + wp[i] = up[i]; + } + return usize; + } +#endif + + wp -= 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + + for (i = 1; i < usize; i++) + { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} diff --git a/contrib/libgmp/mpn/generic/scan0.c b/contrib/libgmp/mpn/generic/scan0.c new file mode 100644 index 0000000..d6f6580 --- /dev/null +++ b/contrib/libgmp/mpn/generic/scan0.c @@ -0,0 +1,62 @@ +/* mpn_scan0 -- Scan from a given bit position for the next clear bit. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Design issues: + 1. What if starting_bit is not within U? Caller's problem? + 2. Bit index should be 'unsigned'? + + Argument constraints: + 1. U must sooner ot later have a limb with a clear bit. + */ + +unsigned long int +#if __STDC__ +mpn_scan0 (register mp_srcptr up, + register unsigned long int starting_bit) +#else +mpn_scan0 (up, starting_bit) + register mp_srcptr up; + register unsigned long int starting_bit; +#endif +{ + mp_size_t starting_word; + mp_limb_t alimb; + int cnt; + mp_srcptr p; + + /* Start at the word implied by STARTING_BIT. */ + starting_word = starting_bit / BITS_PER_MP_LIMB; + p = up + starting_word; + alimb = ~*p++; + + /* Mask off any bits before STARTING_BIT in the first limb. */ + alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB); + + while (alimb == 0) + alimb = ~*p++; + + count_leading_zeros (cnt, alimb & -alimb); + return (p - up) * BITS_PER_MP_LIMB - 1 - cnt; +} diff --git a/contrib/libgmp/mpn/generic/scan1.c b/contrib/libgmp/mpn/generic/scan1.c new file mode 100644 index 0000000..c95d090 --- /dev/null +++ b/contrib/libgmp/mpn/generic/scan1.c @@ -0,0 +1,62 @@ +/* mpn_scan1 -- Scan from a given bit position for the next set bit. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Design issues: + 1. What if starting_bit is not within U? Caller's problem? + 2. Bit index should be 'unsigned'? + + Argument constraints: + 1. U must sooner ot later have a limb != 0. + */ + +unsigned long int +#if __STDC__ +mpn_scan1 (register mp_srcptr up, + register unsigned long int starting_bit) +#else +mpn_scan1 (up, starting_bit) + register mp_srcptr up; + register unsigned long int starting_bit; +#endif +{ + mp_size_t starting_word; + mp_limb_t alimb; + int cnt; + mp_srcptr p; + + /* Start at the word implied by STARTING_BIT. */ + starting_word = starting_bit / BITS_PER_MP_LIMB; + p = up + starting_word; + alimb = *p++; + + /* Mask off any bits before STARTING_BIT in the first limb. */ + alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB); + + while (alimb == 0) + alimb = *p++; + + count_leading_zeros (cnt, alimb & -alimb); + return (p - up) * BITS_PER_MP_LIMB - 1 - cnt; +} diff --git a/contrib/libgmp/mpn/generic/set_str.c b/contrib/libgmp/mpn/generic/set_str.c new file mode 100644 index 0000000..424fad3 --- /dev/null +++ b/contrib/libgmp/mpn/generic/set_str.c @@ -0,0 +1,154 @@ +/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) + -- Convert a STR_LEN long base BASE byte string pointed to by STR to a + limb vector pointed to by RES_PTR. Return the number of limbs in + RES_PTR. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_size_t +mpn_set_str (xp, str, str_len, base) + mp_ptr xp; + const unsigned char *str; + size_t str_len; + int base; +{ + mp_size_t size; + mp_limb_t big_base; + int indigits_per_limb; + mp_limb_t res_digit; + + big_base = __mp_bases[base].big_base; + indigits_per_limb = __mp_bases[base].chars_per_limb; + +/* size = str_len / indigits_per_limb + 1; */ + + size = 0; + + if ((base & (base - 1)) == 0) + { + /* The base is a power of 2. Read the input string from + least to most significant character/digit. */ + + const unsigned char *s; + int next_bitpos; + int bits_per_indigit = big_base; + + res_digit = 0; + next_bitpos = 0; + + for (s = str + str_len - 1; s >= str; s--) + { + int inp_digit = *s; + + res_digit |= (mp_limb_t) inp_digit << next_bitpos; + next_bitpos += bits_per_indigit; + if (next_bitpos >= BITS_PER_MP_LIMB) + { + xp[size++] = res_digit; + next_bitpos -= BITS_PER_MP_LIMB; + res_digit = inp_digit >> (bits_per_indigit - next_bitpos); + } + } + + if (res_digit != 0) + xp[size++] = res_digit; + } + else + { + /* General case. The base is not a power of 2. */ + + size_t i; + int j; + mp_limb_t cy_limb; + + for (i = indigits_per_limb; i < str_len; i += indigits_per_limb) + { + res_digit = *str++; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = 1; j < indigits_per_limb; j++) + res_digit = res_digit * 10 + *str++; + } + else + { + for (j = 1; j < indigits_per_limb; j++) + res_digit = res_digit * base + *str++; + } + + if (size == 0) + { + if (res_digit != 0) + { + xp[0] = res_digit; + size = 1; + } + } + else + { + cy_limb = mpn_mul_1 (xp, xp, size, big_base); + cy_limb += mpn_add_1 (xp, xp, size, res_digit); + if (cy_limb != 0) + xp[size++] = cy_limb; + } + } + + big_base = base; + res_digit = *str++; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = 1; j < str_len - (i - indigits_per_limb); j++) + { + res_digit = res_digit * 10 + *str++; + big_base *= 10; + } + } + else + { + for (j = 1; j < str_len - (i - indigits_per_limb); j++) + { + res_digit = res_digit * base + *str++; + big_base *= base; + } + } + + if (size == 0) + { + if (res_digit != 0) + { + xp[0] = res_digit; + size = 1; + } + } + else + { + cy_limb = mpn_mul_1 (xp, xp, size, big_base); + cy_limb += mpn_add_1 (xp, xp, size, res_digit); + if (cy_limb != 0) + xp[size++] = cy_limb; + } + } + + return size; +} diff --git a/contrib/libgmp/mpn/generic/sqrtrem.c b/contrib/libgmp/mpn/generic/sqrtrem.c new file mode 100644 index 0000000..539480d --- /dev/null +++ b/contrib/libgmp/mpn/generic/sqrtrem.c @@ -0,0 +1,498 @@ +/* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) + + Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR. + Write the remainder at REM_PTR, if REM_PTR != NULL. + Return the size of the remainder. + (The size of the root is always half of the size of the operand.) + + OP_PTR and ROOT_PTR may not point to the same object. + OP_PTR and REM_PTR may point to the same object. + + If REM_PTR is NULL, only the root is computed and the return value of + the function is 0 if OP is a perfect square, and *any* non-zero number + otherwise. + +Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* This code is just correct if "unsigned char" has at least 8 bits. It + doesn't help to use CHAR_BIT from limits.h, as the real problem is + the static arrays. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Square root algorithm: + + 1. Shift OP (the input) to the left an even number of bits s.t. there + are an even number of words and either (or both) of the most + significant bits are set. This way, sqrt(OP) has exactly half as + many words as OP, and has its most significant bit set. + + 2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables. + This approximation is used for the first single-precision + iterations of Newton's method, yielding a full-word approximation + to sqrt(OP). + + 3. Perform multiple-precision Newton iteration until we have the + exact result. Only about half of the input operand is used in + this calculation, as the square root is perfectly determinable + from just the higher half of a number. */ + +/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */ +#if defined __GNUC__ && ! defined __SOFT_FLOAT + +#if defined __sparc__ +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined __HAVE_68881__ +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined __hppa +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined _ARCH_PWR2 +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#endif + +#ifndef SQRT + +/* Tables for initial approximation of the square root. These are + indexed with bits 1-8 of the operand for which the square root is + calculated, where bit 0 is the most significant non-zero bit. I.e. + the most significant one-bit is not used, since that per definition + is one. Likewise, the tables don't return the highest bit of the + result. That bit must be inserted by or:ing the returned value with + 0x100. This way, we get a 9-bit approximation from 8-bit tables! */ + +/* Table to be used for operands with an even total number of bits. + (Exactly as in the decimal system there are similarities between the + square root of numbers with the same initial digits and an even + difference in the total number of digits. Consider the square root + of 1, 10, 100, 1000, ...) */ +static unsigned char even_approx_tab[256] = +{ + 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e, + 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74, + 0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79, + 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f, + 0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84, + 0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89, + 0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f, + 0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94, + 0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99, + 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e, + 0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3, + 0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7, + 0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac, + 0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1, + 0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6, + 0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba, + 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf, + 0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3, + 0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8, + 0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc, + 0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1, + 0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5, + 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda, + 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde, + 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2, + 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6, + 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb, + 0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef, + 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3, + 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, + 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, + 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, +}; + +/* Table to be used for operands with an odd total number of bits. + (Further comments before previous table.) */ +static unsigned char odd_approx_tab[256] = +{ + 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, + 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, + 0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12, + 0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16, + 0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a, + 0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d, + 0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21, + 0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24, + 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28, + 0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b, + 0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f, + 0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32, + 0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35, + 0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39, + 0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, + 0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, + 0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42, + 0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45, + 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49, + 0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c, + 0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f, + 0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52, + 0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, + 0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58, + 0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, + 0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e, + 0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61, + 0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63, + 0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66, + 0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69, +}; +#endif + + +mp_size_t +#if __STDC__ +mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size) +#else +mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) + mp_ptr root_ptr; + mp_ptr rem_ptr; + mp_srcptr op_ptr; + mp_size_t op_size; +#endif +{ + /* R (root result) */ + mp_ptr rp; /* Pointer to least significant word */ + mp_size_t rsize; /* The size in words */ + + /* T (OP shifted to the left a.k.a. normalized) */ + mp_ptr tp; /* Pointer to least significant word */ + mp_size_t tsize; /* The size in words */ + mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */ + mp_limb_t t_high0, t_high1; /* The two most significant words */ + + /* TT (temporary for numerator/remainder) */ + mp_ptr ttp; /* Pointer to least significant word */ + + /* X (temporary for quotient in main loop) */ + mp_ptr xp; /* Pointer to least significant word */ + mp_size_t xsize; /* The size in words */ + + unsigned cnt; + mp_limb_t initial_approx; /* Initially made approximation */ + mp_size_t tsizes[BITS_PER_MP_LIMB]; /* Successive calculation precisions */ + mp_size_t tmp; + mp_size_t i; + + mp_limb_t cy_limb; + TMP_DECL (marker); + + /* If OP is zero, both results are zero. */ + if (op_size == 0) + return 0; + + count_leading_zeros (cnt, op_ptr[op_size - 1]); + tsize = op_size; + if ((tsize & 1) != 0) + { + cnt += BITS_PER_MP_LIMB; + tsize++; + } + + rsize = tsize / 2; + rp = root_ptr; + + TMP_MARK (marker); + + /* Shift OP an even number of bits into T, such that either the most or + the second most significant bit is set, and such that the number of + words in T becomes even. This way, the number of words in R=sqrt(OP) + is exactly half as many as in OP, and the most significant bit of R + is set. + + Also, the initial approximation is simplified by this up-shifted OP. + + Finally, the Newtonian iteration which is the main part of this + program performs division by R. The fast division routine expects + the divisor to be "normalized" in exactly the sense of having the + most significant bit set. */ + + tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + + if ((cnt & ~1) % BITS_PER_MP_LIMB != 0) + t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size, + (cnt & ~1) % BITS_PER_MP_LIMB); + else + MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size); + + if (cnt >= BITS_PER_MP_LIMB) + tp[0] = 0; + + t_high0 = tp[tsize - 1]; + t_high1 = tp[tsize - 2]; /* Never stray. TSIZE is >= 2. */ + +/* Is there a fast sqrt instruction defined for this machine? */ +#ifdef SQRT + { + initial_approx = SQRT (t_high0 * 2.0 + * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)) + + t_high1); + /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have + become incorrect due to overflow in the conversion from double to + mp_limb_t above. It will typically be zero in that case, but might be + a small number on some machines. The most significant bit of + INITIAL_APPROX should be set, so that bit is a good overflow + indication. */ + if ((mp_limb_signed_t) initial_approx >= 0) + initial_approx = ~(mp_limb_t)0; + } +#else + /* Get a 9 bit approximation from the tables. The tables expect to + be indexed with the 8 high bits right below the highest bit. + Also, the highest result bit is not returned by the tables, and + must be or:ed into the result. The scheme gives 9 bits of start + approximation with just 256-entry 8 bit tables. */ + + if ((cnt & 1) == 0) + { + /* The most sign bit of t_high0 is set. */ + initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1); + initial_approx &= 0xff; + initial_approx = even_approx_tab[initial_approx]; + } + else + { + /* The most significant bit of T_HIGH0 is unset, + the second most significant is set. */ + initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2); + initial_approx &= 0xff; + initial_approx = odd_approx_tab[initial_approx]; + } + initial_approx |= 0x100; + initial_approx <<= BITS_PER_MP_LIMB - 8 - 1; + + /* Perform small precision Newtonian iterations to get a full word + approximation. For small operands, these iteration will make the + entire job. */ + if (t_high0 == ~(mp_limb_t)0) + initial_approx = t_high0; + else + { + mp_limb_t quot; + + if (t_high0 >= initial_approx) + initial_approx = t_high0 + 1; + + /* First get about 18 bits with pure C arithmetics. */ + quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2; + initial_approx = (initial_approx + quot) / 2; + initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + + /* Now get a full word by one (or for > 36 bit machines) several + iterations. */ + for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1) + { + mp_limb_t ignored_remainder; + + udiv_qrnnd (quot, ignored_remainder, + t_high0, t_high1, initial_approx); + initial_approx = (initial_approx + quot) / 2; + initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } +#endif + + rp[0] = initial_approx; + rsize = 1; + +#ifdef DEBUG + printf ("\n\nT = "); + mpn_dump (tp, tsize); +#endif + + if (tsize > 2) + { + /* Determine the successive precisions to use in the iteration. We + minimize the precisions, beginning with the highest (i.e. last + iteration) to the lowest (i.e. first iteration). */ + + xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + + t_end_ptr = tp + tsize; + + tmp = tsize / 2; + for (i = 0;; i++) + { + tsize = (tmp + 1) / 2; + if (tmp == tsize) + break; + tsizes[i] = tsize + tmp; + tmp = tsize; + } + + /* Main Newton iteration loop. For big arguments, most of the + time is spent here. */ + + /* It is possible to do a great optimization here. The successive + divisors in the mpn_divmod call below has more and more leading + words equal to its predecessor. Therefore the beginning of + each division will repeat the same work as did the last + division. If we could guarantee that the leading words of two + consecutive divisors are the same (i.e. in this case, a later + divisor has just more digits at the end) it would be a simple + matter of just using the old remainder of the last division in + a subsequent division, to take care of this optimization. This + idea would surely make a difference even for small arguments. */ + + /* Loop invariants: + + R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1. + X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X. + R <= shiftdown_to_same_size(X). */ + + while (--i >= 0) + { + mp_limb_t cy; +#ifdef DEBUG + mp_limb_t old_least_sign_r = rp[0]; + mp_size_t old_rsize = rsize; + + printf ("R = "); + mpn_dump (rp, rsize); +#endif + tsize = tsizes[i]; + + /* Need to copy the numerator into temporary space, as + mpn_divmod overwrites its numerator argument with the + remainder (which we currently ignore). */ + MPN_COPY (ttp, t_end_ptr - tsize, tsize); + cy = mpn_divmod (xp, ttp, tsize, rp, rsize); + xsize = tsize - rsize; + +#ifdef DEBUG + printf ("X =%d ", cy); + mpn_dump (xp, xsize); +#endif + + /* Add X and R with the most significant limbs aligned, + temporarily ignoring at least one limb at the low end of X. */ + tmp = xsize - rsize; + cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize); + + /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get + intermediate roots that'd need an extra bit. We don't want to + handle that since it would make the subsequent divisor + non-normalized, so round such roots down to be only ones in the + current precision. */ + if (cy == 2) + { + mp_size_t j; + for (j = xsize; j >= 0; j--) + xp[j] = ~(mp_limb_t)0; + } + + /* Divide X by 2 and put the result in R. This is the new + approximation. Shift in the carry from the addition. */ + mpn_rshift (rp, xp, xsize, 1); + rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)); + rsize = xsize; +#ifdef DEBUG + if (old_least_sign_r != rp[rsize - old_rsize]) + printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n", + i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r, + 2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]); +#endif + } + } + +#ifdef DEBUG + printf ("(final) R = "); + mpn_dump (rp, rsize); +#endif + + /* We computed the square root of OP * 2**(2*floor(cnt/2)). + This has resulted in R being 2**floor(cnt/2) to large. + Shift it down here to fix that. */ + if (cnt / 2 != 0) + { + mpn_rshift (rp, rp, rsize, cnt/2); + rsize -= rp[rsize - 1] == 0; + } + + /* Calculate the remainder. */ + mpn_mul_n (tp, rp, rp, rsize); + tsize = rsize + rsize; + tsize -= tp[tsize - 1] == 0; + if (op_size < tsize + || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0)) + { + /* R is too large. Decrement it. */ + + /* These operations can't overflow. */ + cy_limb = mpn_sub_n (tp, tp, rp, rsize); + cy_limb += mpn_sub_n (tp, tp, rp, rsize); + mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb); + mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1); + + mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1); + +#ifdef DEBUG + printf ("(adjusted) R = "); + mpn_dump (rp, rsize); +#endif + } + + if (rem_ptr != NULL) + { + cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize); + MPN_NORMALIZE (rem_ptr, op_size); + TMP_FREE (marker); + return op_size; + } + else + { + int res; + res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size); + TMP_FREE (marker); + return res; + } +} diff --git a/contrib/libgmp/mpn/generic/sub_n.c b/contrib/libgmp/mpn/generic/sub_n.c new file mode 100644 index 0000000..9d4b216 --- /dev/null +++ b/contrib/libgmp/mpn/generic/sub_n.c @@ -0,0 +1,62 @@ +/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/contrib/libgmp/mpn/generic/submul_1.c b/contrib/libgmp/mpn/generic/submul_1.c new file mode 100644 index 0000000..b144283 --- /dev/null +++ b/contrib/libgmp/mpn/generic/submul_1.c @@ -0,0 +1,65 @@ +/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, subtract the S1_SIZE least significant limbs of the product + from the limb vector pointed to by RES_PTR. Return the most significant + limb of the product, adjusted for carry-out from the subtraction. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += (prod_low > x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/contrib/libgmp/mpn/generic/udiv_w_sdiv.c b/contrib/libgmp/mpn/generic/udiv_w_sdiv.c new file mode 100644 index 0000000..d9e71b7 --- /dev/null +++ b/contrib/libgmp/mpn/generic/udiv_w_sdiv.c @@ -0,0 +1,125 @@ +/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed + division. + + Contributed by Peter L. Montgomery. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_udiv_w_sdiv (rp, a1, a0, d) + mp_limb_t *rp, a1, a0, d; +{ + mp_limb_t q, r; + mp_limb_t c0, c1, b1; + + if ((mp_limb_signed_t) d >= 0) + { + if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1))) + { + /* dividend, divisor, and quotient are nonnegative */ + sdiv_qrnnd (q, r, a1, a0, d); + } + else + { + /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */ + sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1)); + /* Divide (c1*2^32 + c0) by d */ + sdiv_qrnnd (q, r, c1, c0, d); + /* Add 2^31 to quotient */ + q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } + else + { + b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */ + c1 = a1 >> 1; /* A/2 */ + c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1); + + if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */ + { + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */ + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */ + { + c1 = (b1 - 1) - c1; + c0 = ~c0; /* logical NOT */ + + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + q = ~q; /* (A/2)/b1 */ + r = (b1 - 1) - r; + + r = 2*r + (a0 & 1); /* A/(2*b1) */ + + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else /* Implies c1 = b1 */ + { /* Hence a1 = d - 1 = 2*b1 - 1 */ + if (a0 >= -d) + { + q = -1; + r = a0 + d; + } + else + { + q = -2; + r = a0 + 2*d; + } + } + } + + *rp = r; + return q; +} |