From 44ccca75bef19e5ba824cda0274412e89e17d56a Mon Sep 17 00:00:00 2001
From: czurnieden <czurnieden@gmx.de>
Date: Fri, 4 May 2018 00:01:45 +0200
Subject: [PATCH] the lost files from the last commit

---
 bn_mp_get_bit.c                      |  35 +++
 bn_mp_kronecker.c                    | 139 +++++++++++
 bn_mp_mul_si.c                       |  48 ++++
 bn_mp_prime_frobenius_underwood.c    | 183 ++++++++++++++
 bn_mp_prime_strong_lucas_selfridge.c | 358 +++++++++++++++++++++++++++
 5 files changed, 763 insertions(+)
 create mode 100644 bn_mp_get_bit.c
 create mode 100644 bn_mp_kronecker.c
 create mode 100644 bn_mp_mul_si.c
 create mode 100644 bn_mp_prime_frobenius_underwood.c
 create mode 100644 bn_mp_prime_strong_lucas_selfridge.c

diff --git a/bn_mp_get_bit.c b/bn_mp_get_bit.c
new file mode 100644
index 0000000..974246b
--- /dev/null
+++ b/bn_mp_get_bit.c
@@ -0,0 +1,35 @@
+#include "tommath_private.h"
+#ifdef BN_MP_GET_BIT_C
+
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ */
+
+/* Checks the bit at position b and returns MP_YES
+   if the bit is 1, MP_NO if it is 0 and MP_VAL
+   in case of error */
+int mp_get_bit(const mp_int *a, int b)
+{
+   int limb;
+   mp_digit bit, isset;
+
+   if (b < 0) {
+      return MP_VAL;
+   }
+
+   limb = b / DIGIT_BIT;
+   bit = (mp_digit)1 << ((mp_digit)b % DIGIT_BIT);
+   isset = a->dp[limb] & bit;
+   return (isset != 0) ? MP_YES : MP_NO;
+}
+
+#endif
diff --git a/bn_mp_kronecker.c b/bn_mp_kronecker.c
new file mode 100644
index 0000000..656170e
--- /dev/null
+++ b/bn_mp_kronecker.c
@@ -0,0 +1,139 @@
+#include "tommath_private.h"
+#ifdef BN_MP_KRONECKER_C
+
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ */
+
+/*
+   Kronecker symbol (a|p)
+   Straightforward implementation of algorithm 1.4.10 in
+   Henri Cohen: "A Course in Computational Algebraic Number Theory"
+
+   @book{cohen2013course,
+     title={A course in computational algebraic number theory},
+     author={Cohen, Henri},
+     volume={138},
+     year={2013},
+     publisher={Springer Science \& Business Media}
+    }
+ */
+int mp_kronecker(const mp_int *a, const mp_int *p, int *c)
+{
+   mp_int a1, p1, r;
+
+   int e = MP_OKAY;
+   int v, k;
+
+   const int table[8] = {0, 1, 0, -1, 0, -1, 0, 1};
+
+   if (mp_iszero(p)) {
+      if (a->used == 1 && a->dp[0] == 1) {
+         *c = 1;
+         return e;
+      } else {
+         *c = 0;
+         return e;
+      }
+   }
+
+   if (mp_iseven(a) && mp_iseven(p)) {
+      *c = 0;
+      return e;
+   }
+
+   if ((e = mp_init_copy(&a1, a)) != MP_OKAY) {
+      return e;
+   }
+   if ((e = mp_init_copy(&p1, p)) != MP_OKAY) {
+      goto LBL_KRON_0;
+   }
+
+   v = mp_cnt_lsb(&p1);
+   if ((e = mp_div_2d(&p1, v, &p1, NULL)) != MP_OKAY) {
+      goto LBL_KRON_1;
+   }
+
+   if ((v & 0x1) == 0) {
+      k = 1;
+   } else {
+      k = table[a->dp[0] & 7];
+   }
+
+   if (p1.sign == MP_NEG) {
+      p1.sign = MP_ZPOS;
+      if (a1.sign == MP_NEG) {
+         k = -k;
+      }
+   }
+
+   if ((e = mp_init(&r)) != MP_OKAY) {
+      goto LBL_KRON_1;
+   }
+
+   for (;;) {
+      if (mp_iszero(&a1)) {
+         if (mp_cmp_d(&p1, 1) == MP_EQ) {
+            *c = k;
+            goto LBL_KRON;
+         } else {
+            *c = 0;
+            goto LBL_KRON;
+         }
+      }
+
+      v = mp_cnt_lsb(&a1);
+      if ((e = mp_div_2d(&a1, v, &a1, NULL)) != MP_OKAY) {
+         goto LBL_KRON;
+      }
+
+      if ((v & 0x1) == 1) {
+         k = k * table[p1.dp[0] & 7];
+      }
+
+      if (a1.sign == MP_NEG) {
+         // compute k = (-1)^((a1)*(p1-1)/4) * k
+         // a1.dp[0] + 1 cannot overflow because the MSB
+         // of the type mp_digit is not set by definition
+         if ((a1.dp[0] + 1) & p1.dp[0] & 2u) {
+            k = -k;
+         }
+      } else {
+         // compute k = (-1)^((a1-1)*(p1-1)/4) * k
+         if (a1.dp[0] & p1.dp[0] & 2u) {
+            k = -k;
+         }
+      }
+
+      if ((e = mp_copy(&a1,&r)) != MP_OKAY) {
+         goto LBL_KRON;
+      }
+      r.sign = MP_ZPOS;
+      if ((e = mp_mod(&p1, &r, &a1)) != MP_OKAY) {
+         goto LBL_KRON;
+      }
+      if ((e = mp_copy(&r, &p1)) != MP_OKAY) {
+         goto LBL_KRON;
+      }
+   }
+
+LBL_KRON:
+   mp_clear(&r);
+LBL_KRON_0:
+   mp_clear(&a1);
+LBL_KRON_1:
+   mp_clear(&p1);
+   return e;
+}
+
+
+#endif
diff --git a/bn_mp_mul_si.c b/bn_mp_mul_si.c
new file mode 100644
index 0000000..026cd24
--- /dev/null
+++ b/bn_mp_mul_si.c
@@ -0,0 +1,48 @@
+#include "tommath_private.h"
+#ifdef BN_MP_MUL_SI_C
+
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ */
+
+// multiply bigint a with int d and put the result in c
+// Like mp_mul_d() but with a signed long as the small input
+int mp_mul_si(const mp_int *a, long d, mp_int *c)
+{
+   mp_int t;
+   int err;
+
+   if ((err = mp_init(&t)) != MP_OKAY) {
+      return err;
+   }
+   if (d < 0) {
+      d = -d;
+   }
+   // mp_digit might be smaller than a long, which excludes
+   // the use of mp_mul_d() here.
+   if ((err = mp_set_int(&t, (unsigned long) d)) != MP_OKAY) {
+      goto LBL_MPMULSI_ERR;
+   }
+   if ((err = mp_mul(a, &t, c)) != MP_OKAY) {
+      goto LBL_MPMULSI_ERR;
+   }
+   if (d < 0) {
+      c->sign = (a->sign == MP_NEG) ? MP_ZPOS: MP_NEG;
+   }
+LBL_MPMULSI_ERR:
+   mp_clear(&t);
+   return err;
+}
+
+
+
+#endif
diff --git a/bn_mp_prime_frobenius_underwood.c b/bn_mp_prime_frobenius_underwood.c
new file mode 100644
index 0000000..d16ff98
--- /dev/null
+++ b/bn_mp_prime_frobenius_underwood.c
@@ -0,0 +1,183 @@
+#include "tommath_private.h"
+#ifdef BN_MP_PRIME_FROBENIUS_UNDERWOOD_C
+
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ */
+
+
+#ifdef MP_8BIT
+// floor of positive solution of
+// (2^16)-1 = (a+4)*(2*a+5)
+// TODO: that is too small, would have to use a bigint for a instead
+// #define LTM_FROBENIUS_UNDERWOOD_A 177
+#error "Frobenius test not usable with MP_8BIT"
+#endif
+// floor of positive solution of
+// (2^31)-1 = (a+4)*(2*a+5)
+// TODO: that might be too small
+#define LTM_FROBENIUS_UNDERWOOD_A 32764
+int mp_prime_frobenius_underwood(const mp_int *N, int *result)
+{
+   mp_int T1z,T2z,Np1z,sz,tz;
+
+   int a, ap2, length, i, j, isset;
+   int e = MP_OKAY;
+
+   *result = MP_NO;
+
+   if ((e = mp_init_multi(&T1z,&T2z,&Np1z,&sz,&tz, NULL)) != MP_OKAY) {
+      goto LBL_FU_ERR;
+   }
+
+   for (a = 0; a < LTM_FROBENIUS_UNDERWOOD_A; a++) {
+      //TODO: That's ugly! No, really, it is!
+      if (a==2||a==4||a==7||a==8||a==10||a==14||a==18||a==23||a==26||a==28) {
+         continue;
+      }
+      // (32764^2 - 4) < 2^31, no bigint for >MP_8BIT needed)
+      if ((e = mp_set_int(&T1z,(unsigned long)a)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+
+      if ((e = mp_sqr(&T1z,&T1z)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+
+      if ((e = mp_sub_d(&T1z,4,&T1z)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+
+      if ((e = mp_kronecker(&T1z, N, &j)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+
+      if (j == -1) {
+         break;
+      }
+
+      if (j == 0) {
+         // composite
+         goto LBL_FU_ERR;
+      }
+   }
+   if (a >= LTM_FROBENIUS_UNDERWOOD_A) {
+      e = MP_VAL;
+      goto LBL_FU_ERR;
+   }
+   // Composite if N and (a+4)*(2*a+5) are not coprime
+   if ((e = mp_set_int(&T1z, (unsigned long)((a+4)*(2*a+5)))) != MP_OKAY) {
+      goto LBL_FU_ERR;
+   }
+
+   if ((e = mp_gcd(N,&T1z,&T1z)) != MP_OKAY) {
+      goto LBL_FU_ERR;
+   }
+
+   if (!(T1z.used == 1 && T1z.dp[0] == 1u)) {
+      goto LBL_FU_ERR;
+   }
+
+   ap2 = a + 2;
+   if ((e = mp_add_d(N,1u,&Np1z)) != MP_OKAY) {
+      goto LBL_FU_ERR;
+   }
+
+   mp_set(&sz,1u);
+   mp_set(&tz,2u);
+   length = mp_count_bits(&Np1z);
+
+   for (i = length - 2; i >= 0; i--) {
+      /*
+         temp = (sz*(a*sz+2*tz))%N;
+         tz   = ((tz-sz)*(tz+sz))%N;
+         sz   = temp;
+       */
+      if ((e = mp_mul_2(&tz,&T2z)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      // TODO: is this small saving worth the branch?
+      if (a != 0) {
+         if ((e = mp_mul_d(&sz,(mp_digit)a,&T1z)) != MP_OKAY) {
+            goto LBL_FU_ERR;
+         }
+         if ((e = mp_add(&T1z,&T2z,&T2z)) != MP_OKAY) {
+            goto LBL_FU_ERR;
+         }
+      }
+      if ((e = mp_mul(&T2z, &sz, &T1z)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      if ((e = mp_sub(&tz, &sz, &T2z)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      if ((e = mp_add(&sz, &tz, &sz)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      if ((e = mp_mul(&sz, &T2z, &tz)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      if ((e = mp_mod(&tz, N, &tz)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      if ((e = mp_mod(&T1z, N, &sz)) != MP_OKAY) {
+         goto LBL_FU_ERR;
+      }
+      if ((isset = mp_get_bit(&Np1z,i)) == MP_VAL) {
+         e = isset;
+         goto LBL_FU_ERR;
+      }
+      if (isset == MP_YES) {
+         /*
+             temp = (a+2) * sz + tz
+             tz   = 2 * tz - sz
+             sz   = temp
+          */
+         if (a == 0) {
+            if ((e = mp_mul_2(&sz,&T1z)) != MP_OKAY) {
+               goto LBL_FU_ERR;
+            }
+         } else {
+            if ((e = mp_mul_d(&sz, (mp_digit) ap2, &T1z)) != MP_OKAY) {
+               goto LBL_FU_ERR;
+            }
+         }
+         if ((e = mp_add(&T1z, &tz, &T1z)) != MP_OKAY) {
+            goto LBL_FU_ERR;
+         }
+         if ((e = mp_mul_2(&tz, &T2z)) != MP_OKAY) {
+            goto LBL_FU_ERR;
+         }
+         if ((e = mp_sub(&T2z, &sz, &tz)) != MP_OKAY) {
+            goto LBL_FU_ERR;
+         }
+         mp_exch(&sz,&T1z);
+      }
+   }
+
+   if ((e = mp_set_int(&T1z, (unsigned long)(2 * a + 5))) != MP_OKAY) {
+      goto LBL_FU_ERR;
+   }
+   if ((e = mp_mod(&T1z,N,&T1z)) != MP_OKAY) {
+      goto LBL_FU_ERR;
+   }
+   if (mp_iszero(&sz) && (mp_cmp(&tz, &T1z) == MP_EQ)) {
+      *result = MP_YES;
+      goto LBL_FU_ERR;
+   }
+
+LBL_FU_ERR:
+   mp_clear_multi(&T1z,&T2z,&Np1z,&sz,&tz, NULL);
+   return e;
+}
+
+#endif
diff --git a/bn_mp_prime_strong_lucas_selfridge.c b/bn_mp_prime_strong_lucas_selfridge.c
new file mode 100644
index 0000000..f79419f
--- /dev/null
+++ b/bn_mp_prime_strong_lucas_selfridge.c
@@ -0,0 +1,358 @@
+#include "tommath_private.h"
+#ifdef BN_MP_PRIME_STRONG_LUCAS_SELFRIDGE_C
+
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ */
+
+#ifdef MP_8BIT
+#error "BPSW test not for MP_8BIT yet"
+#endif
+/*
+    Strong Lucas-Selfridge test.
+    returns MP_YES if it is a strong L-S prime, MP_NO if it is composite
+
+    Code ported from  Thomas Ray Nicely's implementation of the BPSW test
+    at http://www.trnicely.net/misc/bpsw.html
+
+    Freeware copyright (C) 2016 Thomas R. Nicely <http://www.trnicely.net>.
+    Released into the public domain by the author, who disclaims any legal
+    liability arising from its use
+
+    The multi-line comments are made by Thomas R. Nicely and are copied verbatim.
+    Single-line comments are by the code-portist.
+
+    (If that name sounds familiar, he is the guy who found the fdiv bug in the
+     Pentium (P5x, I think) Intel processor)
+*/
+int mp_prime_strong_lucas_selfridge(const mp_int *a, int *result)
+{
+   // TODO: choose better variable names! "Dz" and "dz"? Really?
+   mp_int Dz, gcd, Np1, dz, Uz, Vz, U2mz, V2mz, Qmz, Q2mz, Qkdz, T1z, T2z, T3z, T4z, Q2kdz;
+   // TODO: Some of them need the full 32 bit, hence the (temporary) exclusion of MP_8BIT
+   int32_t D, Ds, J, sign, P, Q, r, s, u, Nbits;
+   int e = MP_OKAY;
+   int isset;
+
+   *result = MP_NO;
+
+   /*
+   Find the first element D in the sequence {5, -7, 9, -11, 13, ...}
+   such that Jacobi(D,N) = -1 (Selfridge's algorithm). Theory
+   indicates that, if N is not a perfect square, D will "nearly
+   always" be "small." Just in case, an overflow trap for D is
+   included.
+   */
+
+   D = 5;
+   sign = 1;
+
+   if ((e = mp_init_multi(&Dz, &gcd, &Np1, &dz, &Uz, &Vz, &U2mz, &V2mz, &Qmz, &Q2mz, &Qkdz, &T1z, &T2z, &T3z, &T4z, &Q2kdz,
+                          NULL)) != MP_OKAY) {
+      return e;
+   }
+
+   for (;;) {
+      Ds   = sign * D;
+      sign = -sign;
+      if ((e = mp_set_int(&Dz,(unsigned long) D)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_gcd(a, &Dz, &gcd)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      /* if 1 < GCD < N then N is composite with factor "D", and
+         Jacobi(D,N) is technically undefined (but often returned
+         as zero). */
+      if ((gcd.used > 1 || gcd.dp[0] > 1)  && mp_cmp(&gcd,a) == MP_LT) {
+         goto LBL_LS_ERR;
+      }
+
+      if ((e = mp_kronecker(&Dz, a, &J)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+
+      if (J < 0) {
+         break;
+      }
+      D += 2;
+
+      if (D > INT_MAX - 2) {
+         e = MP_VAL;
+         goto LBL_LS_ERR;
+      }
+   }
+
+   P = 1;              /* Selfridge's choice */
+   Q = (1 - Ds) / 4;   /* Required so D = P*P - 4*Q */
+
+   /* NOTE: The conditions (a) N does not divide Q, and
+      (b) D is square-free or not a perfect square, are included by
+      some authors; e.g., "Prime numbers and computer methods for
+      factorization," Hans Riesel (2nd ed., 1994, Birkhauser, Boston),
+      p. 130. For this particular application of Lucas sequences,
+      these conditions were found to be immaterial. */
+
+   /* Now calculate N - Jacobi(D,N) = N + 1 (even), and calculate the
+      odd positive integer d and positive integer s for which
+      N + 1 = 2^s*d (similar to the step for N - 1 in Miller's test).
+      The strong Lucas-Selfridge test then returns N as a strong
+      Lucas probable prime (slprp) if any of the following
+      conditions is met: U_d=0, V_d=0, V_2d=0, V_4d=0, V_8d=0,
+      V_16d=0, ..., etc., ending with V_{2^(s-1)*d}=V_{(N+1)/2}=0
+      (all equalities mod N). Thus d is the highest index of U that
+      must be computed (since V_2m is independent of U), compared
+      to U_{N+1} for the standard Lucas-Selfridge test; and no
+      index of V beyond (N+1)/2 is required, just as in the
+      standard Lucas-Selfridge test. However, the quantity Q^d must
+      be computed for use (if necessary) in the latter stages of
+      the test. The result is that the strong Lucas-Selfridge test
+      has a running time only slightly greater (order of 10 %) than
+      that of the standard Lucas-Selfridge test, while producing
+      only (roughly) 30 % as many pseudoprimes (and every strong
+      Lucas pseudoprime is also a standard Lucas pseudoprime). Thus
+      the evidence indicates that the strong Lucas-Selfridge test is
+      more effective than the standard Lucas-Selfridge test, and a
+      Baillie-PSW test based on the strong Lucas-Selfridge test
+      should be more reliable. */
+
+   if ((e = mp_add_d(a,1,&Np1)) != MP_OKAY) {
+      goto LBL_LS_ERR;
+   }
+   s = mp_cnt_lsb(&Np1);
+
+   // this should round towards zero because
+   // Thomas R. Nicely used GMP's mpz_tdiv_q_2exp()
+   // mp_div_2d() does that
+   if ((e = mp_div_2d(&Np1, s, &dz, NULL)) != MP_OKAY) {
+      goto LBL_LS_ERR;
+   }
+
+
+   /* We must now compute U_d and V_d. Since d is odd, the accumulated
+      values U and V are initialized to U_1 and V_1 (if the target
+      index were even, U and V would be initialized instead to U_0=0
+      and V_0=2). The values of U_2m and V_2m are also initialized to
+      U_1 and V_1; the FOR loop calculates in succession U_2 and V_2,
+      U_4 and V_4, U_8 and V_8, etc. If the corresponding bits
+      (1, 2, 3, ...) of t are on (the zero bit having been accounted
+      for in the initialization of U and V), these values are then
+      combined with the previous totals for U and V, using the
+      composition formulas for addition of indices. */
+
+   mp_set(&Uz, 1u);    /* U=U_1 */
+   mp_set(&Vz, (mp_digit)P);    /* V=V_1 */
+   mp_set(&U2mz, 1u);  /* U_1 */
+   mp_set(&V2mz, (mp_digit)P);  /* V_1 */
+
+   if (Q < 0) {
+      Q = -Q;
+      if ((e = mp_set_int(&Qmz, (unsigned long) Q)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      Qmz.sign = MP_NEG;
+      if ((e = mp_set_int(&Q2mz, (unsigned long)(2 * Q))) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      Q2mz.sign = MP_NEG;
+      /* Initializes calculation of Q^d */
+      if ((e = mp_set_int(&Qkdz, (unsigned long) Q)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      Qkdz.sign = MP_NEG;
+      Q = -Q;
+   } else {
+      if ((e = mp_set_int(&Qmz, (unsigned long) Q)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_set_int(&Q2mz, (unsigned long)(2 * Q))) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      /* Initializes calculation of Q^d */
+      if ((e = mp_set_int(&Qkdz, (unsigned long) Q)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+   }
+
+   Nbits = mp_count_bits(&dz);
+
+   for (u = 1; u < Nbits; u++) { /* zero bit off, already accounted for */
+      /* Formulas for doubling of indices (carried out mod N). Note that
+       * the indices denoted as "2m" are actually powers of 2, specifically
+       * 2^(ul-1) beginning each loop and 2^ul ending each loop.
+       *
+       * U_2m = U_m*V_m
+       * V_2m = V_m*V_m - 2*Q^m
+       */
+
+      if ((e = mp_mul(&U2mz,&V2mz,&U2mz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_mod(&U2mz,a,&U2mz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_sqr(&V2mz,&V2mz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_sub(&V2mz,&Q2mz,&V2mz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_mod(&V2mz,a,&V2mz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      /* Must calculate powers of Q for use in V_2m, also for Q^d later */
+      if ((e = mp_sqr(&Qmz,&Qmz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      /* prevents overflow */ // still necessary without a fixed prealloc'd mem.?
+      if ((e = mp_mod(&Qmz,a,&Qmz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_mul_2(&Qmz,&Q2mz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+
+      if ((isset = mp_get_bit(&dz,u)) == MP_VAL) {
+         e = isset;
+         goto LBL_LS_ERR;
+      }
+
+      if (isset == MP_YES) {
+         /* Formulas for addition of indices (carried out mod N);
+          *
+          * U_(m+n) = (U_m*V_n + U_n*V_m)/2
+          * V_(m+n) = (V_m*V_n + D*U_m*U_n)/2
+          *
+          * Be careful with division by 2 (mod N)!
+          */
+         if ((e = mp_mul(&U2mz,&Vz,&T1z)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mul(&Uz,&V2mz,&T2z)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mul(&V2mz,&Vz,&T3z)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mul(&U2mz,&Uz,&T4z)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mul_si(&T4z,(long)Ds,&T4z)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_add(&T1z,&T2z,&Uz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if (mp_isodd(&Uz)) {
+            if ((e = mp_add(&Uz,a,&Uz)) != MP_OKAY) {
+               goto LBL_LS_ERR;
+            }
+         }
+         // This should round towards negative infinity because
+         // Thomas R. Nicely used GMP's mpz_fdiv_q_2exp().
+         // But mp_div_2() does not do so, it is truncating instead.
+         if ((e = mp_div_2(&Uz,&Uz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if (Uz.sign == MP_NEG && mp_isodd(&Uz)) {
+            if ((e = mp_sub_d(&Uz,1,&Uz)) != MP_OKAY) {
+               goto LBL_LS_ERR;
+            }
+         }
+         if ((e = mp_add(&T3z,&T4z,&Vz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if (mp_isodd(&Vz)) {
+            if ((e = mp_add(&Vz,a,&Vz)) != MP_OKAY) {
+               goto LBL_LS_ERR;
+            }
+         }
+         if ((e = mp_div_2(&Vz,&Vz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if (Vz.sign == MP_NEG) {
+            if ((e = mp_sub_d(&Vz,1,&Vz)) != MP_OKAY) {
+               goto LBL_LS_ERR;
+            }
+         }
+         if ((e = mp_mod(&Uz,a,&Uz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mod(&Vz,a,&Vz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         /* Calculating Q^d for later use */
+         if ((e = mp_mul(&Qkdz,&Qmz,&Qkdz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mod(&Qkdz,a,&Qkdz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+      }
+   }
+
+   /* If U_d or V_d is congruent to 0 mod N, then N is a prime or a
+      strong Lucas pseudoprime. */
+   if (mp_iszero(&Uz) || mp_iszero(&Vz)) {
+      *result = MP_YES;
+      goto LBL_LS_ERR;
+   }
+
+   /* NOTE: Ribenboim ("The new book of prime number records," 3rd ed.,
+      1995/6) omits the condition V0 on p.142, but includes it on
+      p. 130. The condition is NECESSARY; otherwise the test will
+      return false negatives---e.g., the primes 29 and 2000029 will be
+      returned as composite. */
+
+   /* Otherwise, we must compute V_2d, V_4d, V_8d, ..., V_{2^(s-1)*d}
+      by repeated use of the formula V_2m = V_m*V_m - 2*Q^m. If any of
+      these are congruent to 0 mod N, then N is a prime or a strong
+      Lucas pseudoprime. */
+
+   /* Initialize 2*Q^(d*2^r) for V_2m */
+   if ((e = mp_mul_2(&Qkdz,&Q2kdz)) != MP_OKAY) {
+      goto LBL_LS_ERR;
+   }
+
+   for (r = 1; r < s; r++) {
+      if ((e = mp_sqr(&Vz,&Vz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_sub(&Vz,&Q2kdz,&Vz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if ((e = mp_mod(&Vz,a,&Vz)) != MP_OKAY) {
+         goto LBL_LS_ERR;
+      }
+      if (mp_iszero(&Vz)) {
+         *result = MP_YES;
+         goto LBL_LS_ERR;
+      }
+      /* Calculate Q^{d*2^r} for next r (final iteration irrelevant). */
+      if (r < s - 1) {
+         if ((e = mp_sqr(&Qkdz,&Qkdz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mod(&Qkdz,a,&Qkdz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+         if ((e = mp_mul_2(&Qkdz,&Q2kdz)) != MP_OKAY) {
+            goto LBL_LS_ERR;
+         }
+      }
+   }
+LBL_LS_ERR:
+   mp_clear_multi(&Dz, &gcd, &Np1, &Uz, &Vz, &U2mz, &V2mz, &Qmz, &Q2mz, &Qkdz, &T1z, &T2z, &T3z, &T4z, &Q2kdz, NULL);
+   return e;
+}
+
+#endif