added libtommath-0.18
2
bn.tex
@ -1,7 +1,7 @@
|
||||
\documentclass[]{article}
|
||||
\begin{document}
|
||||
|
||||
\title{LibTomMath v0.17 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
|
||||
\title{LibTomMath v0.18 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
|
||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||
\maketitle
|
||||
\newpage
|
||||
|
@ -14,7 +14,7 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* computes xR^-1 == x (mod N) via Montgomery Reduction
|
||||
/* computes xR**-1 == x (mod N) via Montgomery Reduction
|
||||
*
|
||||
* This is an optimized implementation of mp_montgomery_reduce
|
||||
* which uses the comba method to quickly calculate the columns of the
|
||||
@ -23,76 +23,77 @@
|
||||
* Based on Algorithm 14.32 on pp.601 of HAC.
|
||||
*/
|
||||
int
|
||||
fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
|
||||
fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
|
||||
{
|
||||
int ix, res, olduse;
|
||||
mp_word W[MP_WARRAY];
|
||||
|
||||
/* get old used count */
|
||||
olduse = a->used;
|
||||
olduse = x->used;
|
||||
|
||||
/* grow a as required */
|
||||
if (a->alloc < m->used + 1) {
|
||||
if ((res = mp_grow (a, m->used + 1)) != MP_OKAY) {
|
||||
if (x->alloc < n->used + 1) {
|
||||
if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
register mp_word *_W;
|
||||
register mp_digit *tmpa;
|
||||
register mp_digit *tmpx;
|
||||
|
||||
_W = W;
|
||||
tmpa = a->dp;
|
||||
tmpx = x->dp;
|
||||
|
||||
/* copy the digits of a into W[0..a->used-1] */
|
||||
for (ix = 0; ix < a->used; ix++) {
|
||||
*_W++ = *tmpa++;
|
||||
for (ix = 0; ix < x->used; ix++) {
|
||||
*_W++ = *tmpx++;
|
||||
}
|
||||
|
||||
/* zero the high words of W[a->used..m->used*2] */
|
||||
for (; ix < m->used * 2 + 1; ix++) {
|
||||
for (; ix < n->used * 2 + 1; ix++) {
|
||||
*_W++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (ix = 0; ix < m->used; ix++) {
|
||||
/* ui = ai * m' mod b
|
||||
for (ix = 0; ix < n->used; ix++) {
|
||||
/* mu = ai * m' mod b
|
||||
*
|
||||
* We avoid a double precision multiplication (which isn't required)
|
||||
* by casting the value down to a mp_digit. Note this requires that W[ix-1] have
|
||||
* the carry cleared (see after the inner loop)
|
||||
* by casting the value down to a mp_digit. Note this requires
|
||||
* that W[ix-1] have the carry cleared (see after the inner loop)
|
||||
*/
|
||||
register mp_digit ui;
|
||||
ui = (((mp_digit) (W[ix] & MP_MASK)) * mp) & MP_MASK;
|
||||
register mp_digit mu;
|
||||
mu = (((mp_digit) (W[ix] & MP_MASK)) * rho) & MP_MASK;
|
||||
|
||||
/* a = a + ui * m * b^i
|
||||
/* a = a + mu * m * b**i
|
||||
*
|
||||
* This is computed in place and on the fly. The multiplication
|
||||
* by b^i is handled by offseting which columns the results
|
||||
* by b**i is handled by offseting which columns the results
|
||||
* are added to.
|
||||
*
|
||||
* Note the comba method normally doesn't handle carries in the inner loop
|
||||
* In this case we fix the carry from the previous column since the Montgomery
|
||||
* reduction requires digits of the result (so far) [see above] to work. This is
|
||||
* handled by fixing up one carry after the inner loop. The carry fixups are done
|
||||
* in order so after these loops the first m->used words of W[] have the carries
|
||||
* fixed
|
||||
* Note the comba method normally doesn't handle carries in the
|
||||
* inner loop In this case we fix the carry from the previous
|
||||
* column since the Montgomery reduction requires digits of the
|
||||
* result (so far) [see above] to work. This is
|
||||
* handled by fixing up one carry after the inner loop. The
|
||||
* carry fixups are done in order so after these loops the
|
||||
* first m->used words of W[] have the carries fixed
|
||||
*/
|
||||
{
|
||||
register int iy;
|
||||
register mp_digit *tmpx;
|
||||
register mp_digit *tmpn;
|
||||
register mp_word *_W;
|
||||
|
||||
/* alias for the digits of the modulus */
|
||||
tmpx = m->dp;
|
||||
tmpn = n->dp;
|
||||
|
||||
/* Alias for the columns set by an offset of ix */
|
||||
_W = W + ix;
|
||||
|
||||
/* inner loop */
|
||||
for (iy = 0; iy < m->used; iy++) {
|
||||
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
|
||||
for (iy = 0; iy < n->used; iy++) {
|
||||
*_W++ += ((mp_word) mu) * ((mp_word) * tmpn++);
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,44 +103,44 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
|
||||
|
||||
|
||||
{
|
||||
register mp_digit *tmpa;
|
||||
register mp_digit *tmpx;
|
||||
register mp_word *_W, *_W1;
|
||||
|
||||
/* nox fix rest of carries */
|
||||
_W1 = W + ix;
|
||||
_W = W + ++ix;
|
||||
|
||||
for (; ix <= m->used * 2 + 1; ix++) {
|
||||
for (; ix <= n->used * 2 + 1; ix++) {
|
||||
*_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
|
||||
}
|
||||
|
||||
/* copy out, A = A/b^n
|
||||
/* copy out, A = A/b**n
|
||||
*
|
||||
* The result is A/b^n but instead of converting from an array of mp_word
|
||||
* to mp_digit than calling mp_rshd we just copy them in the right
|
||||
* order
|
||||
* The result is A/b**n but instead of converting from an
|
||||
* array of mp_word to mp_digit than calling mp_rshd
|
||||
* we just copy them in the right order
|
||||
*/
|
||||
tmpa = a->dp;
|
||||
_W = W + m->used;
|
||||
tmpx = x->dp;
|
||||
_W = W + n->used;
|
||||
|
||||
for (ix = 0; ix < m->used + 1; ix++) {
|
||||
*tmpa++ = *_W++ & ((mp_word) MP_MASK);
|
||||
for (ix = 0; ix < n->used + 1; ix++) {
|
||||
*tmpx++ = *_W++ & ((mp_word) MP_MASK);
|
||||
}
|
||||
|
||||
/* zero oldused digits, if the input a was larger than
|
||||
* m->used+1 we'll have to clear the digits */
|
||||
for (; ix < olduse; ix++) {
|
||||
*tmpa++ = 0;
|
||||
*tmpx++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* set the max used and clamp */
|
||||
a->used = m->used + 1;
|
||||
mp_clamp (a);
|
||||
x->used = n->used + 1;
|
||||
mp_clamp (x);
|
||||
|
||||
/* if A >= m then A = A - m */
|
||||
if (mp_cmp_mag (a, m) != MP_LT) {
|
||||
return s_mp_sub (a, m, a);
|
||||
if (mp_cmp_mag (x, n) != MP_LT) {
|
||||
return s_mp_sub (x, n, x);
|
||||
}
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -16,15 +16,17 @@
|
||||
|
||||
/* fast squaring
|
||||
*
|
||||
* This is the comba method where the columns of the product are computed first
|
||||
* then the carries are computed. This has the effect of making a very simple
|
||||
* inner loop that is executed the most
|
||||
* This is the comba method where the columns of the product
|
||||
* are computed first then the carries are computed. This
|
||||
* has the effect of making a very simple inner loop that
|
||||
* is executed the most
|
||||
*
|
||||
* W2 represents the outer products and W the inner.
|
||||
*
|
||||
* A further optimizations is made because the inner products are of the form
|
||||
* "A * B * 2". The *2 part does not need to be computed until the end which is
|
||||
* good because 64-bit shifts are slow!
|
||||
* A further optimizations is made because the inner
|
||||
* products are of the form "A * B * 2". The *2 part does
|
||||
* not need to be computed until the end which is good
|
||||
* because 64-bit shifts are slow!
|
||||
*
|
||||
* Based on Algorithm 14.16 on pp.597 of HAC.
|
||||
*
|
||||
@ -48,26 +50,15 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
* Note that there are two buffers. Since squaring requires
|
||||
* a outter and inner product and the inner product requires
|
||||
* computing a product and doubling it (a relatively expensive
|
||||
* op to perform n^2 times if you don't have to) the inner and
|
||||
* op to perform n**2 times if you don't have to) the inner and
|
||||
* outer products are computed in different buffers. This way
|
||||
* the inner product can be doubled using n doublings instead of
|
||||
* n^2
|
||||
* n**2
|
||||
*/
|
||||
memset (W, 0, newused * sizeof (mp_word));
|
||||
memset (W2, 0, newused * sizeof (mp_word));
|
||||
|
||||
/* note optimization
|
||||
* values in W2 are only written in even locations which means
|
||||
* we can collapse the array to 256 words [and fixup the memset above]
|
||||
* provided we also fix up the summations below. Ideally
|
||||
* the fixup loop should be unrolled twice to handle the even/odd
|
||||
* cases, and then a final step to handle odd cases [e.g. newused == odd]
|
||||
*
|
||||
* This will not only save ~8*256 = 2KB of stack but lower the number of
|
||||
* operations required to finally fix up the columns
|
||||
*/
|
||||
|
||||
/* This computes the inner product. To simplify the inner N^2 loop
|
||||
/* This computes the inner product. To simplify the inner N**2 loop
|
||||
* the multiplication by two is done afterwards in the N loop.
|
||||
*/
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
@ -101,18 +92,19 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
}
|
||||
|
||||
/* setup dest */
|
||||
olduse = b->used;
|
||||
olduse = b->used;
|
||||
b->used = newused;
|
||||
|
||||
/* double first value, since the inner products are half of what they should be */
|
||||
W[0] += W[0] + W2[0];
|
||||
|
||||
/* now compute digits */
|
||||
{
|
||||
register mp_digit *tmpb;
|
||||
|
||||
tmpb = b->dp;
|
||||
/* double first value, since the inner products are
|
||||
* half of what they should be
|
||||
*/
|
||||
W[0] += W[0] + W2[0];
|
||||
|
||||
tmpb = b->dp;
|
||||
for (ix = 1; ix < newused; ix++) {
|
||||
/* double/add next digit */
|
||||
W[ix] += W[ix] + W2[ix];
|
||||
@ -120,9 +112,13 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
|
||||
*tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
|
||||
}
|
||||
/* set the last value. Note even if the carry is zero
|
||||
* this is required since the next step will not zero
|
||||
* it if b originally had a value at b->dp[2*a.used]
|
||||
*/
|
||||
*tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
|
||||
|
||||
/* clear high */
|
||||
/* clear high digits */
|
||||
for (; ix < olduse; ix++) {
|
||||
*tmpb++ = 0;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* computes a = 2^b
|
||||
/* computes a = 2**b
|
||||
*
|
||||
* Simple algorithm which zeroes the int, grows it then just sets one bit
|
||||
* as required.
|
||||
|
@ -21,7 +21,7 @@ mp_copy (mp_int * a, mp_int * b)
|
||||
int res, n;
|
||||
|
||||
/* if dst == src do nothing */
|
||||
if (a == b || a->dp == b->dp) {
|
||||
if (a == b) {
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
|
@ -21,11 +21,15 @@ mp_count_bits (mp_int * a)
|
||||
int r;
|
||||
mp_digit q;
|
||||
|
||||
/* shortcut */
|
||||
if (a->used == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get number of digits and add that */
|
||||
r = (a->used - 1) * DIGIT_BIT;
|
||||
|
||||
/* take the last digit and count the bits in it */
|
||||
q = a->dp[a->used - 1];
|
||||
while (q > ((mp_digit) 0)) {
|
||||
++r;
|
||||
|
@ -14,7 +14,7 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* shift right by a certain bit count (store quotient in c, remainder in d) */
|
||||
/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
|
||||
int
|
||||
mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
|
||||
{
|
||||
@ -81,7 +81,6 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
|
||||
}
|
||||
}
|
||||
mp_clamp (c);
|
||||
res = MP_OKAY;
|
||||
if (d != NULL) {
|
||||
mp_exch (&t, d);
|
||||
}
|
||||
|
64
bn_mp_div_3.c
Normal file
@ -0,0 +1,64 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* divide by three (based on routine from MPI and the GMP manual) */
|
||||
int
|
||||
mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
|
||||
{
|
||||
mp_int q;
|
||||
mp_word w, t;
|
||||
mp_digit b;
|
||||
int res, ix;
|
||||
|
||||
/* b = 2**DIGIT_BIT / 3 */
|
||||
b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
|
||||
|
||||
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
q.used = a->used;
|
||||
q.sign = a->sign;
|
||||
w = 0;
|
||||
for (ix = a->used - 1; ix >= 0; ix--) {
|
||||
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
|
||||
|
||||
if (w >= 3) {
|
||||
t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
|
||||
w -= (t << ((mp_word)1)) + t;
|
||||
while (w >= 3) {
|
||||
t += 1;
|
||||
w -= 3;
|
||||
}
|
||||
} else {
|
||||
t = 0;
|
||||
}
|
||||
q.dp[ix] = t;
|
||||
}
|
||||
|
||||
if (d != NULL) {
|
||||
*d = w;
|
||||
}
|
||||
|
||||
if (c != NULL) {
|
||||
mp_clamp(&q);
|
||||
mp_exch(&q, c);
|
||||
}
|
||||
mp_clear(&q);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -14,31 +14,51 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* single digit division */
|
||||
/* single digit division (based on routine from MPI) */
|
||||
int
|
||||
mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
|
||||
{
|
||||
mp_int t, t2;
|
||||
int res;
|
||||
|
||||
if ((res = mp_init (&t)) != MP_OKAY) {
|
||||
return res;
|
||||
mp_int q;
|
||||
mp_word w, t;
|
||||
int res, ix;
|
||||
|
||||
if (b == 0) {
|
||||
return MP_VAL;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&t2)) != MP_OKAY) {
|
||||
mp_clear (&t);
|
||||
return res;
|
||||
|
||||
if (b == 3) {
|
||||
return mp_div_3(a, c, d);
|
||||
}
|
||||
|
||||
mp_set (&t, b);
|
||||
res = mp_div (a, &t, c, &t2);
|
||||
|
||||
/* set remainder if not null */
|
||||
|
||||
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
q.used = a->used;
|
||||
q.sign = a->sign;
|
||||
w = 0;
|
||||
for (ix = a->used - 1; ix >= 0; ix--) {
|
||||
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
|
||||
|
||||
if (w >= b) {
|
||||
t = w / b;
|
||||
w = w % b;
|
||||
} else {
|
||||
t = 0;
|
||||
}
|
||||
q.dp[ix] = t;
|
||||
}
|
||||
|
||||
if (d != NULL) {
|
||||
*d = t2.dp[0];
|
||||
*d = w;
|
||||
}
|
||||
|
||||
mp_clear (&t);
|
||||
mp_clear (&t2);
|
||||
|
||||
if (c != NULL) {
|
||||
mp_clamp(&q);
|
||||
mp_exch(&q, c);
|
||||
}
|
||||
mp_clear(&q);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* reduce "a" in place modulo "b" using the Diminished Radix algorithm.
|
||||
/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
|
||||
*
|
||||
* Based on algorithm from the paper
|
||||
*
|
||||
@ -23,107 +23,64 @@
|
||||
* POSTECH Information Research Laboratories
|
||||
*
|
||||
* The modulus must be of a special format [see manual]
|
||||
*
|
||||
* Has been modified to use algorithm 7.10 from the LTM book instead
|
||||
*/
|
||||
int
|
||||
mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
|
||||
mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
|
||||
{
|
||||
int err, i, j, k;
|
||||
mp_word r;
|
||||
mp_digit mu, *tmpj, *tmpi;
|
||||
|
||||
/* k = digits in modulus */
|
||||
k = b->used;
|
||||
|
||||
/* ensure that "a" has at least 2k digits */
|
||||
if (a->alloc < k + k) {
|
||||
if ((err = mp_grow (a, k + k)) != MP_OKAY) {
|
||||
int err, i, m;
|
||||
mp_word r;
|
||||
mp_digit mu, *tmpx1, *tmpx2;
|
||||
|
||||
/* m = digits in modulus */
|
||||
m = n->used;
|
||||
|
||||
/* ensure that "x" has at least 2m digits */
|
||||
if (x->alloc < m + m) {
|
||||
if ((err = mp_grow (x, m + m)) != MP_OKAY) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* alias for a->dp[i] */
|
||||
tmpi = a->dp + k + k - 1;
|
||||
|
||||
/* for (i = 2k - 1; i >= k; i = i - 1)
|
||||
*
|
||||
* This is the main loop of the reduction. Note that at the end
|
||||
* the words above position k are not zeroed as expected. The end
|
||||
* result is that the digits from 0 to k-1 are the residue. So
|
||||
* we have to clear those afterwards.
|
||||
*/
|
||||
for (i = k + k - 1; i >= k; i = i - 1) {
|
||||
/* x[i - 1 : i - k] += x[i]*mp */
|
||||
|
||||
/* x[i] * mp */
|
||||
r = ((mp_word) *tmpi--) * ((mp_word) mp);
|
||||
|
||||
/* now add r to x[i-1:i-k]
|
||||
*
|
||||
* First add it to the first digit x[i-k] then form the carry
|
||||
* then enter the main loop
|
||||
*/
|
||||
j = i - k;
|
||||
|
||||
/* alias for a->dp[j] */
|
||||
tmpj = a->dp + j;
|
||||
|
||||
/* add digit */
|
||||
*tmpj += (mp_digit)(r & MP_MASK);
|
||||
|
||||
/* this is the carry */
|
||||
mu = (r >> ((mp_word) DIGIT_BIT)) + (*tmpj >> DIGIT_BIT);
|
||||
|
||||
/* clear carry from a->dp[j] */
|
||||
*tmpj++ &= MP_MASK;
|
||||
|
||||
/* now add rest of the digits
|
||||
*
|
||||
* Note this is basically a simple single digit addition to
|
||||
* a larger multiple digit number. This is optimized somewhat
|
||||
* because the propagation of carries is not likely to move
|
||||
* more than a few digits.
|
||||
*
|
||||
*/
|
||||
for (++j; mu != 0 && j <= (i - 1); ++j) {
|
||||
*tmpj += mu;
|
||||
mu = *tmpj >> DIGIT_BIT;
|
||||
*tmpj++ &= MP_MASK;
|
||||
}
|
||||
|
||||
/* if final carry */
|
||||
if (mu != 0) {
|
||||
/* add mp to this to correct */
|
||||
j = i - k;
|
||||
tmpj = a->dp + j;
|
||||
|
||||
*tmpj += mp;
|
||||
mu = *tmpj >> DIGIT_BIT;
|
||||
*tmpj++ &= MP_MASK;
|
||||
|
||||
/* now handle carries */
|
||||
for (++j; mu != 0 && j <= (i - 1); j++) {
|
||||
*tmpj += mu;
|
||||
mu = *tmpj >> DIGIT_BIT;
|
||||
*tmpj++ &= MP_MASK;
|
||||
}
|
||||
}
|
||||
/* top of loop, this is where the code resumes if
|
||||
* another reduction pass is required.
|
||||
*/
|
||||
top:
|
||||
/* aliases for digits */
|
||||
/* alias for lower half of x */
|
||||
tmpx1 = x->dp;
|
||||
|
||||
/* alias for upper half of x, or x/B**m */
|
||||
tmpx2 = x->dp + m;
|
||||
|
||||
/* set carry to zero */
|
||||
mu = 0;
|
||||
|
||||
/* compute (x mod B**m) + mp * [x/B**m] inline and inplace */
|
||||
for (i = 0; i < m; i++) {
|
||||
r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
|
||||
*tmpx1++ = r & MP_MASK;
|
||||
mu = r >> ((mp_word)DIGIT_BIT);
|
||||
}
|
||||
|
||||
/* zero words above k */
|
||||
tmpi = a->dp + k;
|
||||
for (i = k; i < a->used; i++) {
|
||||
*tmpi++ = 0;
|
||||
|
||||
/* set final carry */
|
||||
*tmpx1++ = mu;
|
||||
|
||||
/* zero words above m */
|
||||
for (i = m + 1; i < x->used; i++) {
|
||||
*tmpx1++ = 0;
|
||||
}
|
||||
|
||||
/* clamp, sub and return */
|
||||
mp_clamp (a);
|
||||
mp_clamp (x);
|
||||
|
||||
/* if a >= b [b == modulus] then subtract the modulus to fix up */
|
||||
if (mp_cmp_mag (a, b) != MP_LT) {
|
||||
return s_mp_sub (a, b, a);
|
||||
/* if x >= n then subtract and reduce again
|
||||
* Each successive "recursion" makes the input smaller and smaller.
|
||||
*/
|
||||
if (mp_cmp_mag (x, n) != MP_LT) {
|
||||
s_mp_sub(x, n, x);
|
||||
goto top;
|
||||
}
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -20,6 +20,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
|
||||
/* the casts are required if DIGIT_BIT is one less than
|
||||
* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
|
||||
*/
|
||||
*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - ((mp_word)a->dp[0]));
|
||||
*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
|
||||
((mp_word)a->dp[0]));
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* calculate c = a^b using a square-multiply algorithm */
|
||||
/* calculate c = a**b using a square-multiply algorithm */
|
||||
int
|
||||
mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
|
||||
{
|
||||
|
205
bn_mp_exptmod.c
@ -14,7 +14,6 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
static int f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
|
||||
|
||||
/* this is a shell function that calls either the normal or Montgomery
|
||||
* exptmod functions. Originally the call to the montgomery code was
|
||||
@ -55,212 +54,22 @@ mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* and now compute (1/G)^|X| instead of G^X [X < 0] */
|
||||
/* and now compute (1/G)**|X| instead of G**X [X < 0] */
|
||||
err = mp_exptmod(&tmpG, &tmpX, P, Y);
|
||||
mp_clear_multi(&tmpG, &tmpX, NULL);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
dr = mp_dr_is_modulus(P);
|
||||
if (dr == 0) {
|
||||
dr = mp_reduce_is_2k(P) << 1;
|
||||
}
|
||||
|
||||
/* if the modulus is odd use the fast method */
|
||||
if ((mp_isodd (P) == 1 || dr == 1) && P->used > 4) {
|
||||
if ((mp_isodd (P) == 1 || dr != 0) && P->used > 4) {
|
||||
return mp_exptmod_fast (G, X, P, Y, dr);
|
||||
} else {
|
||||
return f_mp_exptmod (G, X, P, Y);
|
||||
return s_mp_exptmod (G, X, P, Y);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
{
|
||||
mp_int M[256], res, mu;
|
||||
mp_digit buf;
|
||||
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
|
||||
|
||||
/* find window size */
|
||||
x = mp_count_bits (X);
|
||||
if (x <= 7) {
|
||||
winsize = 2;
|
||||
} else if (x <= 36) {
|
||||
winsize = 3;
|
||||
} else if (x <= 140) {
|
||||
winsize = 4;
|
||||
} else if (x <= 450) {
|
||||
winsize = 5;
|
||||
} else if (x <= 1303) {
|
||||
winsize = 6;
|
||||
} else if (x <= 3529) {
|
||||
winsize = 7;
|
||||
} else {
|
||||
winsize = 8;
|
||||
}
|
||||
|
||||
#ifdef MP_LOW_MEM
|
||||
if (winsize > 5) {
|
||||
winsize = 5;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* init G array */
|
||||
for (x = 0; x < (1 << winsize); x++) {
|
||||
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
|
||||
for (y = 0; y < x; y++) {
|
||||
mp_clear (&M[y]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* create mu, used for Barrett reduction */
|
||||
if ((err = mp_init (&mu)) != MP_OKAY) {
|
||||
goto __M;
|
||||
}
|
||||
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* create M table
|
||||
*
|
||||
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
|
||||
*
|
||||
* The first half of the table is not computed though accept for M[0] and M[1]
|
||||
*/
|
||||
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
|
||||
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
for (x = 0; x < (winsize - 1); x++) {
|
||||
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* create upper table */
|
||||
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
|
||||
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup result */
|
||||
if ((err = mp_init (&res)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
mp_set (&res, 1);
|
||||
|
||||
/* set initial mode and bit cnt */
|
||||
mode = 0;
|
||||
bitcnt = 1;
|
||||
buf = 0;
|
||||
digidx = X->used - 1;
|
||||
bitcpy = bitbuf = 0;
|
||||
|
||||
for (;;) {
|
||||
/* grab next digit as required */
|
||||
if (--bitcnt == 0) {
|
||||
if (digidx == -1) {
|
||||
break;
|
||||
}
|
||||
buf = X->dp[digidx--];
|
||||
bitcnt = (int) DIGIT_BIT;
|
||||
}
|
||||
|
||||
/* grab the next msb from the exponent */
|
||||
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
|
||||
buf <<= (mp_digit)1;
|
||||
|
||||
/* if the bit is zero and mode == 0 then we ignore it
|
||||
* These represent the leading zero bits before the first 1 bit
|
||||
* in the exponent. Technically this opt is not required but it
|
||||
* does lower the # of trivial squaring/reductions used
|
||||
*/
|
||||
if (mode == 0 && y == 0)
|
||||
continue;
|
||||
|
||||
/* if the bit is zero and mode == 1 then we square */
|
||||
if (mode == 1 && y == 0) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* else we add it to the window */
|
||||
bitbuf |= (y << (winsize - ++bitcpy));
|
||||
mode = 2;
|
||||
|
||||
if (bitcpy == winsize) {
|
||||
/* ok window is filled so square as required and multiply */
|
||||
/* square first */
|
||||
for (x = 0; x < winsize; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* empty window and reset */
|
||||
bitcpy = bitbuf = 0;
|
||||
mode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* if bits remain then square/multiply */
|
||||
if (mode == 2 && bitcpy > 0) {
|
||||
/* square then multiply if the bit is set */
|
||||
for (x = 0; x < bitcpy; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
|
||||
bitbuf <<= 1;
|
||||
if ((bitbuf & (1 << winsize)) != 0) {
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mp_exch (&res, Y);
|
||||
err = MP_OKAY;
|
||||
__RES:mp_clear (&res);
|
||||
__MU:mp_clear (&mu);
|
||||
__M:
|
||||
for (x = 0; x < (1 << winsize); x++) {
|
||||
mp_clear (&M[x]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
@ -27,6 +27,11 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
mp_int M[256], res;
|
||||
mp_digit buf, mp;
|
||||
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
|
||||
|
||||
/* use a pointer to the reduction algorithm. This allows us to use
|
||||
* one of many reduction algorithms without modding the guts of
|
||||
* the code with if statements everywhere.
|
||||
*/
|
||||
int (*redux)(mp_int*,mp_int*,mp_digit);
|
||||
|
||||
/* find window size */
|
||||
@ -64,6 +69,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
}
|
||||
}
|
||||
|
||||
/* determine and setup reduction code */
|
||||
if (redmode == 0) {
|
||||
/* now setup montgomery */
|
||||
if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
|
||||
@ -71,17 +77,23 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
}
|
||||
|
||||
/* automatically pick the comba one if available (saves quite a few calls/ifs) */
|
||||
if ( ((P->used * 2 + 1) < MP_WARRAY) &&
|
||||
if (((P->used * 2 + 1) < MP_WARRAY) &&
|
||||
P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
|
||||
redux = fast_mp_montgomery_reduce;
|
||||
} else {
|
||||
/* use slower baselien method */
|
||||
redux = mp_montgomery_reduce;
|
||||
}
|
||||
} else {
|
||||
} else if (redmode == 1) {
|
||||
/* setup DR reduction */
|
||||
mp_dr_setup(P, &mp);
|
||||
redux = mp_dr_reduce;
|
||||
} else {
|
||||
/* setup 2k reduction */
|
||||
if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
|
||||
goto __M;
|
||||
}
|
||||
redux = mp_reduce_2k;
|
||||
}
|
||||
|
||||
/* setup result */
|
||||
@ -142,7 +154,8 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
bitcnt = 1;
|
||||
buf = 0;
|
||||
digidx = X->used - 1;
|
||||
bitcpy = bitbuf = 0;
|
||||
bitcpy = 0;
|
||||
bitbuf = 0;
|
||||
|
||||
for (;;) {
|
||||
/* grab next digit as required */
|
||||
@ -203,7 +216,8 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
}
|
||||
|
||||
/* empty window and reset */
|
||||
bitcpy = bitbuf = 0;
|
||||
bitcpy = 0;
|
||||
bitbuf = 0;
|
||||
mode = 1;
|
||||
}
|
||||
}
|
||||
@ -233,7 +247,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
}
|
||||
|
||||
if (redmode == 0) {
|
||||
/* fixup result */
|
||||
/* fixup result if Montgomery reduction is used */
|
||||
if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ mp_init (mp_int * a)
|
||||
return MP_MEM;
|
||||
}
|
||||
|
||||
/* set the used to zero, allocated digit to the default precision
|
||||
/* set the used to zero, allocated digits to the default precision
|
||||
* and sign to positive */
|
||||
a->used = 0;
|
||||
a->alloc = MP_PREC;
|
||||
|
@ -14,24 +14,34 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* c = |a| * |b| using Karatsuba Multiplication using three half size multiplications
|
||||
/* c = |a| * |b| using Karatsuba Multiplication using
|
||||
* three half size multiplications
|
||||
*
|
||||
* Let B represent the radix [e.g. 2**DIGIT_BIT] and let n represent half of the number of digits in the min(a,b)
|
||||
* Let B represent the radix [e.g. 2**DIGIT_BIT] and
|
||||
* let n represent half of the number of digits in
|
||||
* the min(a,b)
|
||||
*
|
||||
* a = a1 * B^n + a0
|
||||
* b = b1 * B^n + b0
|
||||
* a = a1 * B**n + a0
|
||||
* b = b1 * B**n + b0
|
||||
*
|
||||
* Then, a * b => a1b1 * B^2n + ((a1 - b1)(a0 - b0) + a0b0 + a1b1) * B + a0b0
|
||||
* Then, a * b =>
|
||||
a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
|
||||
*
|
||||
* Note that a1b1 and a0b0 are used twice and only need to be computed once. So in total
|
||||
* three half size (half # of digit) multiplications are performed, a0b0, a1b1 and (a1-b1)(a0-b0)
|
||||
* Note that a1b1 and a0b0 are used twice and only need to be
|
||||
* computed once. So in total three half size (half # of
|
||||
* digit) multiplications are performed, a0b0, a1b1 and
|
||||
* (a1-b1)(a0-b0)
|
||||
*
|
||||
* Note that a multiplication of half the digits requires 1/4th the number of single precision
|
||||
* multiplications so in total after one call 25% of the single precision multiplications are saved.
|
||||
* Note also that the call to mp_mul can end up back in this function if the a0, a1, b0, or b1 are above
|
||||
* the threshold. This is known as divide-and-conquer and leads to the famous O(N^lg(3)) or O(N^1.584) work which
|
||||
* is asymptopically lower than the standard O(N^2) that the baseline/comba methods use. Generally though the
|
||||
* overhead of this method doesn't pay off until a certain size (N ~ 80) is reached.
|
||||
* Note that a multiplication of half the digits requires
|
||||
* 1/4th the number of single precision multiplications so in
|
||||
* total after one call 25% of the single precision multiplications
|
||||
* are saved. Note also that the call to mp_mul can end up back
|
||||
* in this function if the a0, a1, b0, or b1 are above the threshold.
|
||||
* This is known as divide-and-conquer and leads to the famous
|
||||
* O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
|
||||
* the standard O(N**2) that the baseline/comba methods use.
|
||||
* Generally though the overhead of this method doesn't pay off
|
||||
* until a certain size (N ~ 80) is reached.
|
||||
*/
|
||||
int
|
||||
mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
@ -101,14 +111,15 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
}
|
||||
}
|
||||
|
||||
/* only need to clamp the lower words since by definition the upper words x1/y1 must
|
||||
* have a known number of digits
|
||||
/* only need to clamp the lower words since by definition the
|
||||
* upper words x1/y1 must have a known number of digits
|
||||
*/
|
||||
mp_clamp (&x0);
|
||||
mp_clamp (&y0);
|
||||
|
||||
/* now calc the products x0y0 and x1y1 */
|
||||
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) /* after this x0 is no longer required, free temp [x0==t2]! */
|
||||
/* after this x0 is no longer required, free temp [x0==t2]! */
|
||||
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
|
||||
goto X1Y1; /* x0y0 = x0*y0 */
|
||||
if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
|
||||
goto X1Y1; /* x1y1 = x1*y1 */
|
||||
|
@ -14,10 +14,12 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* Karatsuba squaring, computes b = a*a using three half size squarings
|
||||
/* Karatsuba squaring, computes b = a*a using three
|
||||
* half size squarings
|
||||
*
|
||||
* See comments of mp_karatsuba_mul for details. It is essentially the same algorithm
|
||||
* but merely tuned to perform recursive squarings.
|
||||
* See comments of mp_karatsuba_mul for details. It
|
||||
* is essentially the same algorithm but merely
|
||||
* tuned to perform recursive squarings.
|
||||
*/
|
||||
int
|
||||
mp_karatsuba_sqr (mp_int * a, mp_int * b)
|
||||
@ -74,32 +76,32 @@ mp_karatsuba_sqr (mp_int * a, mp_int * b)
|
||||
|
||||
/* now calc the products x0*x0 and x1*x1 */
|
||||
if (mp_sqr (&x0, &x0x0) != MP_OKAY)
|
||||
goto X1X1; /* x0x0 = x0*x0 */
|
||||
goto X1X1; /* x0x0 = x0*x0 */
|
||||
if (mp_sqr (&x1, &x1x1) != MP_OKAY)
|
||||
goto X1X1; /* x1x1 = x1*x1 */
|
||||
goto X1X1; /* x1x1 = x1*x1 */
|
||||
|
||||
/* now calc (x1-x0)^2 */
|
||||
/* now calc (x1-x0)**2 */
|
||||
if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
|
||||
goto X1X1; /* t1 = x1 - x0 */
|
||||
goto X1X1; /* t1 = x1 - x0 */
|
||||
if (mp_sqr (&t1, &t1) != MP_OKAY)
|
||||
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
|
||||
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
|
||||
|
||||
/* add x0y0 */
|
||||
if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
|
||||
goto X1X1; /* t2 = x0y0 + x1y1 */
|
||||
goto X1X1; /* t2 = x0x0 + x1x1 */
|
||||
if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
|
||||
goto X1X1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
|
||||
goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
|
||||
|
||||
/* shift by B */
|
||||
if (mp_lshd (&t1, B) != MP_OKAY)
|
||||
goto X1X1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
|
||||
goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
|
||||
if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
|
||||
goto X1X1; /* x1y1 = x1y1 << 2*B */
|
||||
goto X1X1; /* x1x1 = x1x1 << 2*B */
|
||||
|
||||
if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
|
||||
goto X1X1; /* t1 = x0y0 + t1 */
|
||||
goto X1X1; /* t1 = x0x0 + t1 */
|
||||
if (mp_add (&t1, &x1x1, b) != MP_OKAY)
|
||||
goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
|
||||
goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
|
||||
|
||||
err = MP_OKAY;
|
||||
|
||||
|
14
bn_mp_lshd.c
@ -33,29 +33,29 @@ mp_lshd (mp_int * a, int b)
|
||||
}
|
||||
|
||||
{
|
||||
register mp_digit *tmpa, *tmpaa;
|
||||
register mp_digit *top, *bottom;
|
||||
|
||||
/* increment the used by the shift amount than copy upwards */
|
||||
/* increment the used by the shift amount then copy upwards */
|
||||
a->used += b;
|
||||
|
||||
/* top */
|
||||
tmpa = a->dp + a->used - 1;
|
||||
top = a->dp + a->used - 1;
|
||||
|
||||
/* base */
|
||||
tmpaa = a->dp + a->used - 1 - b;
|
||||
bottom = a->dp + a->used - 1 - b;
|
||||
|
||||
/* much like mp_rshd this is implemented using a sliding window
|
||||
* except the window goes the otherway around. Copying from
|
||||
* the bottom to the top. see bn_mp_rshd.c for more info.
|
||||
*/
|
||||
for (x = a->used - 1; x >= b; x--) {
|
||||
*tmpa-- = *tmpaa--;
|
||||
*top-- = *bottom--;
|
||||
}
|
||||
|
||||
/* zero the lower digits */
|
||||
tmpa = a->dp;
|
||||
top = a->dp;
|
||||
for (x = 0; x < b; x++) {
|
||||
*tmpa++ = 0;
|
||||
*top++ = 0;
|
||||
}
|
||||
}
|
||||
return MP_OKAY;
|
||||
|
@ -17,31 +17,5 @@
|
||||
int
|
||||
mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
|
||||
{
|
||||
mp_int t, t2;
|
||||
int res;
|
||||
|
||||
|
||||
if ((res = mp_init (&t)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&t2)) != MP_OKAY) {
|
||||
mp_clear (&t);
|
||||
return res;
|
||||
}
|
||||
|
||||
mp_set (&t, b);
|
||||
mp_div (a, &t, NULL, &t2);
|
||||
|
||||
if (t2.sign == MP_NEG) {
|
||||
if ((res = mp_add_d (&t2, b, &t2)) != MP_OKAY) {
|
||||
mp_clear (&t);
|
||||
mp_clear (&t2);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
*c = t2.dp[0];
|
||||
mp_clear (&t);
|
||||
mp_clear (&t2);
|
||||
return MP_OKAY;
|
||||
return mp_div_d(a, b, NULL, c);
|
||||
}
|
||||
|
@ -14,12 +14,12 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* computes xR^-1 == x (mod N) via Montgomery Reduction */
|
||||
/* computes xR**-1 == x (mod N) via Montgomery Reduction */
|
||||
int
|
||||
mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
|
||||
mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
|
||||
{
|
||||
int ix, res, digs;
|
||||
mp_digit ui;
|
||||
mp_digit mu;
|
||||
|
||||
/* can the fast reduction [comba] method be used?
|
||||
*
|
||||
@ -27,55 +27,60 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
|
||||
* than the available columns [255 per default] since carries
|
||||
* are fixed up in the inner loop.
|
||||
*/
|
||||
digs = m->used * 2 + 1;
|
||||
if ((digs < MP_WARRAY)
|
||||
&& m->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
|
||||
return fast_mp_montgomery_reduce (a, m, mp);
|
||||
digs = n->used * 2 + 1;
|
||||
if ((digs < MP_WARRAY) &&
|
||||
n->used <
|
||||
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
|
||||
return fast_mp_montgomery_reduce (x, n, rho);
|
||||
}
|
||||
|
||||
/* grow the input as required */
|
||||
if (a->alloc < m->used * 2 + 1) {
|
||||
if ((res = mp_grow (a, m->used * 2 + 1)) != MP_OKAY) {
|
||||
if (x->alloc < digs) {
|
||||
if ((res = mp_grow (x, digs)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
a->used = m->used * 2 + 1;
|
||||
x->used = digs;
|
||||
|
||||
for (ix = 0; ix < m->used; ix++) {
|
||||
/* ui = ai * m' mod b */
|
||||
ui = (a->dp[ix] * mp) & MP_MASK;
|
||||
for (ix = 0; ix < n->used; ix++) {
|
||||
/* mu = ai * m' mod b */
|
||||
mu = (x->dp[ix] * rho) & MP_MASK;
|
||||
|
||||
/* a = a + ui * m * b^i */
|
||||
/* a = a + mu * m * b**i */
|
||||
{
|
||||
register int iy;
|
||||
register mp_digit *tmpx, *tmpy, mu;
|
||||
register mp_digit *tmpn, *tmpx, u;
|
||||
register mp_word r;
|
||||
|
||||
/* aliases */
|
||||
tmpx = m->dp;
|
||||
tmpy = a->dp + ix;
|
||||
tmpn = n->dp;
|
||||
tmpx = x->dp + ix;
|
||||
|
||||
mu = 0;
|
||||
for (iy = 0; iy < m->used; iy++) {
|
||||
r = ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) + ((mp_word) * tmpy);
|
||||
mu = (r >> ((mp_word) DIGIT_BIT));
|
||||
*tmpy++ = (r & ((mp_word) MP_MASK));
|
||||
/* set the carry to zero */
|
||||
u = 0;
|
||||
|
||||
/* Multiply and add in place */
|
||||
for (iy = 0; iy < n->used; iy++) {
|
||||
r = ((mp_word) mu) * ((mp_word) * tmpn++) +
|
||||
((mp_word) u) + ((mp_word) * tmpx);
|
||||
u = (r >> ((mp_word) DIGIT_BIT));
|
||||
*tmpx++ = (r & ((mp_word) MP_MASK));
|
||||
}
|
||||
/* propagate carries */
|
||||
while (mu) {
|
||||
*tmpy += mu;
|
||||
mu = (*tmpy >> DIGIT_BIT) & 1;
|
||||
*tmpy++ &= MP_MASK;
|
||||
while (u) {
|
||||
*tmpx += u;
|
||||
u = *tmpx >> DIGIT_BIT;
|
||||
*tmpx++ &= MP_MASK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* A = A/b^n */
|
||||
mp_rshd (a, m->used);
|
||||
/* x = x/b**n.used */
|
||||
mp_rshd (x, n->used);
|
||||
|
||||
/* if A >= m then A = A - m */
|
||||
if (mp_cmp_mag (a, m) != MP_LT) {
|
||||
return s_mp_sub (a, m, a);
|
||||
if (mp_cmp_mag (x, n) != MP_LT) {
|
||||
return s_mp_sub (x, n, x);
|
||||
}
|
||||
|
||||
return MP_OKAY;
|
||||
|
@ -16,38 +16,38 @@
|
||||
|
||||
/* setups the montgomery reduction stuff */
|
||||
int
|
||||
mp_montgomery_setup (mp_int * a, mp_digit * mp)
|
||||
mp_montgomery_setup (mp_int * n, mp_digit * rho)
|
||||
{
|
||||
mp_digit x, b;
|
||||
|
||||
/* fast inversion mod 2^k
|
||||
/* fast inversion mod 2**k
|
||||
*
|
||||
* Based on the fact that
|
||||
*
|
||||
* XA = 1 (mod 2^n) => (X(2-XA)) A = 1 (mod 2^2n)
|
||||
* => 2*X*A - X*X*A*A = 1
|
||||
* => 2*(1) - (1) = 1
|
||||
* XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
|
||||
* => 2*X*A - X*X*A*A = 1
|
||||
* => 2*(1) - (1) = 1
|
||||
*/
|
||||
b = a->dp[0];
|
||||
b = n->dp[0];
|
||||
|
||||
if ((b & 1) == 0) {
|
||||
return MP_VAL;
|
||||
}
|
||||
|
||||
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2^4 */
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2^8 */
|
||||
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2**8 */
|
||||
#if !defined(MP_8BIT)
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2^16; each step doubles the nb of bits */
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2**16 */
|
||||
#endif
|
||||
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2^32 */
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2**32 */
|
||||
#endif
|
||||
#ifdef MP_64BIT
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2^64 */
|
||||
x *= 2 - b * x; /* here x*a==1 mod 2**64 */
|
||||
#endif
|
||||
|
||||
/* t = -1/m mod b */
|
||||
*mp = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
|
||||
/* rho = -1/m mod b */
|
||||
*rho = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
15
bn_mp_mul.c
@ -20,19 +20,24 @@ mp_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
{
|
||||
int res, neg;
|
||||
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
|
||||
if (MIN (a->used, b->used) > KARATSUBA_MUL_CUTOFF) {
|
||||
|
||||
if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
|
||||
res = mp_toom_mul(a, b, c);
|
||||
} else if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
|
||||
res = mp_karatsuba_mul (a, b, c);
|
||||
} else {
|
||||
|
||||
/* can we use the fast multiplier?
|
||||
*
|
||||
* The fast multiplier can be used if the output will have less than
|
||||
* MP_WARRAY digits and the number of digits won't affect carry propagation
|
||||
* The fast multiplier can be used if the output will
|
||||
* have less than MP_WARRAY digits and the number of
|
||||
* digits won't affect carry propagation
|
||||
*/
|
||||
int digs = a->used + b->used + 1;
|
||||
|
||||
if ((digs < MP_WARRAY)
|
||||
&& MIN(a->used, b->used) <= (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
|
||||
if ((digs < MP_WARRAY) &&
|
||||
MIN(a->used, b->used) <=
|
||||
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
|
||||
res = fast_s_mp_mul_digs (a, b, c, digs);
|
||||
} else {
|
||||
res = s_mp_mul (a, b, c);
|
||||
|
@ -14,22 +14,8 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* pre-calculate the value required for Barrett reduction
|
||||
* For a given modulus "b" it calulates the value required in "a"
|
||||
*/
|
||||
int
|
||||
mp_reduce_setup (mp_int * a, mp_int * b)
|
||||
{
|
||||
int res;
|
||||
|
||||
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
res = mp_div (a, b, a, NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
|
||||
/* reduces x mod m, assumes 0 < x < m**2, mu is
|
||||
* precomputed via mp_reduce_setup.
|
||||
* From HAC pp.604 Algorithm 14.42
|
||||
*/
|
||||
int
|
||||
@ -38,11 +24,12 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
mp_int q;
|
||||
int res, um = m->used;
|
||||
|
||||
/* q = x */
|
||||
if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* q1 = x / b^(k-1) */
|
||||
/* q1 = x / b**(k-1) */
|
||||
mp_rshd (&q, um - 1);
|
||||
|
||||
/* according to HAC this is optimization is ok */
|
||||
@ -56,15 +43,15 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
}
|
||||
}
|
||||
|
||||
/* q3 = q2 / b^(k+1) */
|
||||
/* q3 = q2 / b**(k+1) */
|
||||
mp_rshd (&q, um + 1);
|
||||
|
||||
/* x = x mod b^(k+1), quick (no division) */
|
||||
/* x = x mod b**(k+1), quick (no division) */
|
||||
if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* q = q * m mod b^(k+1), quick (no division) */
|
||||
/* q = q * m mod b**(k+1), quick (no division) */
|
||||
if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
@ -74,7 +61,7 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* If x < 0, add b^(k+1) to it */
|
||||
/* If x < 0, add b**(k+1) to it */
|
||||
if (mp_cmp_d (x, 0) == MP_LT) {
|
||||
mp_set (&q, 1);
|
||||
if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
|
||||
@ -89,7 +76,7 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CLEANUP:
|
||||
mp_clear (&q);
|
||||
|
||||
|
56
bn_mp_reduce_2k.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* reduces a modulo n where n is of the form 2**p - k */
|
||||
int
|
||||
mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k)
|
||||
{
|
||||
mp_int q;
|
||||
int p, res;
|
||||
|
||||
if ((res = mp_init(&q)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
p = mp_count_bits(n);
|
||||
top:
|
||||
/* q = a/2**p, a = a mod 2**p */
|
||||
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if (k != 1) {
|
||||
/* q = q * k */
|
||||
if ((res = mp_mul_d(&q, k, &q)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
}
|
||||
|
||||
/* a = a + q */
|
||||
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if (mp_cmp_mag(a, n) != MP_LT) {
|
||||
s_mp_sub(a, n, a);
|
||||
goto top;
|
||||
}
|
||||
|
||||
ERR:
|
||||
mp_clear(&q);
|
||||
return res;
|
||||
}
|
||||
|
42
bn_mp_reduce_2k_setup.c
Normal file
@ -0,0 +1,42 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* determines the setup value */
|
||||
int
|
||||
mp_reduce_2k_setup(mp_int *a, mp_digit *d)
|
||||
{
|
||||
int res, p;
|
||||
mp_int tmp;
|
||||
|
||||
if ((res = mp_init(&tmp)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
p = mp_count_bits(a);
|
||||
if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
*d = tmp.dp[0];
|
||||
mp_clear(&tmp);
|
||||
return MP_OKAY;
|
||||
}
|
37
bn_mp_reduce_is_2k.c
Normal file
@ -0,0 +1,37 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* determines if mp_reduce_2k can be used */
|
||||
int
|
||||
mp_reduce_is_2k(mp_int *a)
|
||||
{
|
||||
int ix, iy;
|
||||
|
||||
if (a->used == 0) {
|
||||
return 0;
|
||||
} else if (a->used == 1) {
|
||||
return 1;
|
||||
} else if (a->used > 1) {
|
||||
iy = mp_count_bits(a);
|
||||
for (ix = DIGIT_BIT; ix < iy; ix++) {
|
||||
if ((a->dp[ix/DIGIT_BIT] & ((mp_digit)1 << (mp_digit)(ix % DIGIT_BIT))) == 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
29
bn_mp_reduce_setup.c
Normal file
@ -0,0 +1,29 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* pre-calculate the value required for Barrett reduction
|
||||
* For a given modulus "b" it calulates the value required in "a"
|
||||
*/
|
||||
int
|
||||
mp_reduce_setup (mp_int * a, mp_int * b)
|
||||
{
|
||||
int res;
|
||||
|
||||
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
return mp_div (a, b, a, NULL);
|
||||
}
|
18
bn_mp_rshd.c
@ -32,15 +32,15 @@ mp_rshd (mp_int * a, int b)
|
||||
}
|
||||
|
||||
{
|
||||
register mp_digit *tmpa, *tmpaa;
|
||||
register mp_digit *bottom, *top;
|
||||
|
||||
/* shift the digits down */
|
||||
|
||||
/* base */
|
||||
tmpa = a->dp;
|
||||
/* bottom */
|
||||
bottom = a->dp;
|
||||
|
||||
/* offset into digits */
|
||||
tmpaa = a->dp + b;
|
||||
/* top [offset into digits] */
|
||||
top = a->dp + b;
|
||||
|
||||
/* this is implemented as a sliding window where
|
||||
* the window is b-digits long and digits from
|
||||
@ -53,13 +53,15 @@ mp_rshd (mp_int * a, int b)
|
||||
\-------------------/ ---->
|
||||
*/
|
||||
for (x = 0; x < (a->used - b); x++) {
|
||||
*tmpa++ = *tmpaa++;
|
||||
*bottom++ = *top++;
|
||||
}
|
||||
|
||||
/* zero the top digits */
|
||||
for (; x < a->used; x++) {
|
||||
*tmpa++ = 0;
|
||||
*bottom++ = 0;
|
||||
}
|
||||
}
|
||||
mp_clamp (a);
|
||||
|
||||
/* remove excess digits */
|
||||
a->used -= b;
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ mp_set_int (mp_int * a, unsigned int b)
|
||||
b <<= 4;
|
||||
|
||||
/* ensure that digits are not clamped off */
|
||||
a->used += 32 / DIGIT_BIT + 2;
|
||||
a->used += 1;
|
||||
}
|
||||
mp_clamp (a);
|
||||
return MP_OKAY;
|
||||
|
@ -19,12 +19,16 @@ int
|
||||
mp_sqr (mp_int * a, mp_int * b)
|
||||
{
|
||||
int res;
|
||||
if (a->used > KARATSUBA_SQR_CUTOFF) {
|
||||
if (a->used >= TOOM_SQR_CUTOFF) {
|
||||
res = mp_toom_sqr(a, b);
|
||||
} else if (a->used >= KARATSUBA_SQR_CUTOFF) {
|
||||
res = mp_karatsuba_sqr (a, b);
|
||||
} else {
|
||||
|
||||
/* can we use the fast multiplier? */
|
||||
if ((a->used * 2 + 1) < 512 && a->used < (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
|
||||
if ((a->used * 2 + 1) < MP_WARRAY &&
|
||||
a->used <
|
||||
(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
|
||||
res = fast_s_mp_sqr (a, b);
|
||||
} else {
|
||||
res = s_mp_sqr (a, b);
|
||||
|
268
bn_mp_toom_mul.c
Normal file
@ -0,0 +1,268 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* multiplication using Toom-Cook 3-way algorithm */
|
||||
int
|
||||
mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
|
||||
int res, B;
|
||||
|
||||
/* init temps */
|
||||
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* B */
|
||||
B = MIN(a->used, b->used) / 3;
|
||||
|
||||
/* a = a2 * B^2 + a1 * B + a0 */
|
||||
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
mp_rshd(&a1, B);
|
||||
mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
|
||||
|
||||
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
mp_rshd(&a2, B*2);
|
||||
|
||||
/* b = b2 * B^2 + b1 * B + b0 */
|
||||
if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_copy(b, &b1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
mp_rshd(&b1, B);
|
||||
mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
|
||||
|
||||
if ((res = mp_copy(b, &b2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
mp_rshd(&b2, B*2);
|
||||
|
||||
/* w0 = a0*b0 */
|
||||
if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* w4 = a2 * b2 */
|
||||
if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
|
||||
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
|
||||
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
|
||||
/* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
|
||||
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* now solve the matrix
|
||||
|
||||
0 0 0 0 1
|
||||
1 2 4 8 16
|
||||
1 1 1 1 1
|
||||
16 8 4 2 1
|
||||
1 0 0 0 0
|
||||
|
||||
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication
|
||||
*/
|
||||
|
||||
/* r1 - r4 */
|
||||
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - r0 */
|
||||
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1/2 */
|
||||
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3/2 */
|
||||
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r2 - r0 - r4 */
|
||||
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1 - r2 */
|
||||
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - r2 */
|
||||
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1 - 8r0 */
|
||||
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - 8r4 */
|
||||
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* 3r2 - r1 - r3 */
|
||||
if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1 - r2 */
|
||||
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - r2 */
|
||||
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1/3 */
|
||||
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3/3 */
|
||||
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* at this point shift W[n] by B*n */
|
||||
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
ERR:
|
||||
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL);
|
||||
return res;
|
||||
}
|
||||
|
220
bn_mp_toom_sqr.c
Normal file
@ -0,0 +1,220 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* squaring using Toom-Cook 3-way algorithm */
|
||||
int
|
||||
mp_toom_sqr(mp_int *a, mp_int *b)
|
||||
{
|
||||
mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
|
||||
int res, B;
|
||||
|
||||
/* init temps */
|
||||
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* B */
|
||||
B = a->used / 3;
|
||||
|
||||
/* a = a2 * B^2 + a1 * B + a0 */
|
||||
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
mp_rshd(&a1, B);
|
||||
mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
|
||||
|
||||
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
mp_rshd(&a2, B*2);
|
||||
|
||||
/* w0 = a0*a0 */
|
||||
if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* w4 = a2 * a2 */
|
||||
if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* w1 = (a2 + 2(a1 + 2a0))**2 */
|
||||
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* w3 = (a0 + 2(a1 + 2a2))**2 */
|
||||
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
|
||||
/* w2 = (a2 + a1 + a0)**2 */
|
||||
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* now solve the matrix
|
||||
|
||||
0 0 0 0 1
|
||||
1 2 4 8 16
|
||||
1 1 1 1 1
|
||||
16 8 4 2 1
|
||||
1 0 0 0 0
|
||||
|
||||
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
|
||||
*/
|
||||
|
||||
/* r1 - r4 */
|
||||
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - r0 */
|
||||
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1/2 */
|
||||
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3/2 */
|
||||
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r2 - r0 - r4 */
|
||||
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1 - r2 */
|
||||
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - r2 */
|
||||
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1 - 8r0 */
|
||||
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - 8r4 */
|
||||
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* 3r2 - r1 - r3 */
|
||||
if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1 - r2 */
|
||||
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3 - r2 */
|
||||
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r1/3 */
|
||||
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
/* r3/3 */
|
||||
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* at this point shift W[n] by B*n */
|
||||
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
ERR:
|
||||
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
|
||||
return res;
|
||||
}
|
||||
|
@ -45,7 +45,6 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
|
||||
olduse = c->used;
|
||||
c->used = max + 1;
|
||||
|
||||
/* set the carry to zero */
|
||||
{
|
||||
register mp_digit u, *tmpa, *tmpb, *tmpc;
|
||||
register int i;
|
||||
|
211
bn_s_mp_exptmod.c
Normal file
@ -0,0 +1,211 @@
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is library that provides for multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library is designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
int
|
||||
s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
{
|
||||
mp_int M[256], res, mu;
|
||||
mp_digit buf;
|
||||
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
|
||||
|
||||
/* find window size */
|
||||
x = mp_count_bits (X);
|
||||
if (x <= 7) {
|
||||
winsize = 2;
|
||||
} else if (x <= 36) {
|
||||
winsize = 3;
|
||||
} else if (x <= 140) {
|
||||
winsize = 4;
|
||||
} else if (x <= 450) {
|
||||
winsize = 5;
|
||||
} else if (x <= 1303) {
|
||||
winsize = 6;
|
||||
} else if (x <= 3529) {
|
||||
winsize = 7;
|
||||
} else {
|
||||
winsize = 8;
|
||||
}
|
||||
|
||||
#ifdef MP_LOW_MEM
|
||||
if (winsize > 5) {
|
||||
winsize = 5;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* init M array */
|
||||
for (x = 0; x < (1 << winsize); x++) {
|
||||
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
|
||||
for (y = 0; y < x; y++) {
|
||||
mp_clear (&M[y]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* create mu, used for Barrett reduction */
|
||||
if ((err = mp_init (&mu)) != MP_OKAY) {
|
||||
goto __M;
|
||||
}
|
||||
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* create M table
|
||||
*
|
||||
* The M table contains powers of the input base, e.g. M[x] = G**x mod P
|
||||
*
|
||||
* The first half of the table is not computed though accept for M[0] and M[1]
|
||||
*/
|
||||
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
|
||||
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
for (x = 0; x < (winsize - 1); x++) {
|
||||
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* create upper table */
|
||||
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
|
||||
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup result */
|
||||
if ((err = mp_init (&res)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
mp_set (&res, 1);
|
||||
|
||||
/* set initial mode and bit cnt */
|
||||
mode = 0;
|
||||
bitcnt = 1;
|
||||
buf = 0;
|
||||
digidx = X->used - 1;
|
||||
bitcpy = 0;
|
||||
bitbuf = 0;
|
||||
|
||||
for (;;) {
|
||||
/* grab next digit as required */
|
||||
if (--bitcnt == 0) {
|
||||
if (digidx == -1) {
|
||||
break;
|
||||
}
|
||||
buf = X->dp[digidx--];
|
||||
bitcnt = (int) DIGIT_BIT;
|
||||
}
|
||||
|
||||
/* grab the next msb from the exponent */
|
||||
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
|
||||
buf <<= (mp_digit)1;
|
||||
|
||||
/* if the bit is zero and mode == 0 then we ignore it
|
||||
* These represent the leading zero bits before the first 1 bit
|
||||
* in the exponent. Technically this opt is not required but it
|
||||
* does lower the # of trivial squaring/reductions used
|
||||
*/
|
||||
if (mode == 0 && y == 0)
|
||||
continue;
|
||||
|
||||
/* if the bit is zero and mode == 1 then we square */
|
||||
if (mode == 1 && y == 0) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* else we add it to the window */
|
||||
bitbuf |= (y << (winsize - ++bitcpy));
|
||||
mode = 2;
|
||||
|
||||
if (bitcpy == winsize) {
|
||||
/* ok window is filled so square as required and multiply */
|
||||
/* square first */
|
||||
for (x = 0; x < winsize; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* empty window and reset */
|
||||
bitcpy = 0;
|
||||
bitbuf = 0;
|
||||
mode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* if bits remain then square/multiply */
|
||||
if (mode == 2 && bitcpy > 0) {
|
||||
/* square then multiply if the bit is set */
|
||||
for (x = 0; x < bitcpy; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
|
||||
bitbuf <<= 1;
|
||||
if ((bitbuf & (1 << winsize)) != 0) {
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mp_exch (&res, Y);
|
||||
err = MP_OKAY;
|
||||
__RES:mp_clear (&res);
|
||||
__MU:mp_clear (&mu);
|
||||
__M:
|
||||
for (x = 0; x < (1 << winsize); x++) {
|
||||
mp_clear (&M[x]);
|
||||
}
|
||||
return err;
|
||||
}
|
@ -20,8 +20,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
|
||||
{
|
||||
mp_int t;
|
||||
int res, ix, iy, pa;
|
||||
mp_word r, u;
|
||||
mp_digit tmpx, *tmpt;
|
||||
mp_word r;
|
||||
mp_digit u, tmpx, *tmpt;
|
||||
|
||||
pa = a->used;
|
||||
if ((res = mp_init_size (&t, pa + pa + 1)) != MP_OKAY) {
|
||||
@ -32,7 +32,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
/* first calculate the digit at 2*ix */
|
||||
/* calculate double precision result */
|
||||
r = ((mp_word) t.dp[ix + ix]) + ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
|
||||
r = ((mp_word) t.dp[ix + ix]) +
|
||||
((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
|
||||
|
||||
/* store lower part in result */
|
||||
t.dp[ix + ix] = (mp_digit) (r & ((mp_word) MP_MASK));
|
||||
@ -44,7 +45,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
|
||||
tmpx = a->dp[ix];
|
||||
|
||||
/* alias for where to store the results */
|
||||
tmpt = &(t.dp[ix + ix + 1]);
|
||||
tmpt = t.dp + (ix + ix + 1);
|
||||
|
||||
for (iy = ix + 1; iy < pa; iy++) {
|
||||
/* first calculate the product */
|
||||
r = ((mp_word) tmpx) * ((mp_word) a->dp[iy]);
|
||||
@ -60,13 +62,9 @@ s_mp_sqr (mp_int * a, mp_int * b)
|
||||
/* get carry */
|
||||
u = (r >> ((mp_word) DIGIT_BIT));
|
||||
}
|
||||
r = ((mp_word) * tmpt) + u;
|
||||
*tmpt = (mp_digit) (r & ((mp_word) MP_MASK));
|
||||
u = (r >> ((mp_word) DIGIT_BIT));
|
||||
/* propagate upwards */
|
||||
++tmpt;
|
||||
while (u != ((mp_word) 0)) {
|
||||
r = ((mp_word) * tmpt) + ((mp_word) 1);
|
||||
while (u != ((mp_digit) 0)) {
|
||||
r = ((mp_word) * tmpt) + ((mp_word) u);
|
||||
*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
|
||||
u = (r >> ((mp_word) DIGIT_BIT));
|
||||
}
|
||||
|
@ -33,7 +33,6 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
|
||||
olduse = c->used;
|
||||
c->used = max;
|
||||
|
||||
/* sub digits from lower part */
|
||||
{
|
||||
register mp_digit u, *tmpa, *tmpb, *tmpc;
|
||||
register int i;
|
||||
@ -52,7 +51,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
|
||||
/* U = carry bit of T[i]
|
||||
* Note this saves performing an AND operation since
|
||||
* if a carry does occur it will propagate all the way to the
|
||||
* MSB. As a result a single shift is required to get the carry
|
||||
* MSB. As a result a single shift is enough to get the carry
|
||||
*/
|
||||
u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
|
||||
|
||||
@ -81,3 +80,4 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
|
||||
mp_clamp (c);
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
|
7
bncore.c
@ -18,11 +18,14 @@
|
||||
|
||||
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
|
||||
-------------------------------------------------------------
|
||||
Intel P4 /GCC v3.2 / 81/ 110
|
||||
Intel P4 /GCC v3.2 / 70/ 108
|
||||
AMD Athlon XP /GCC v3.2 / 109/ 127
|
||||
|
||||
*/
|
||||
|
||||
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
|
||||
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 127; /* Min. number of digits before Karatsuba squaring is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
|
||||
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
|
||||
TOOM_SQR_CUTOFF = 400;
|
||||
|
12
changes.txt
@ -1,3 +1,15 @@
|
||||
May 29th, 2003
|
||||
v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not very elegantly.
|
||||
(e.g. correct result, just bad looking code)
|
||||
-- Fixed bug in mp_sqr which still had a 512 constant instead of MP_WARRAY
|
||||
-- Added Toom-Cook multipliers [needs tuning!]
|
||||
-- Added efficient divide by 3 algorithm mp_div_3
|
||||
-- Re-wrote mp_div_d to be faster than calling mp_div
|
||||
-- Added in a donated BCC makefile and a single page LTM poster (ahalhabsi@sbcglobal.net)
|
||||
-- Added mp_reduce_2k which reduces an input modulo n = 2**p - k for any single digit k
|
||||
-- Made the exptmod system be aware of the 2k reduction algorithms.
|
||||
-- Rewrote mp_dr_reduce to be smaller, simpler and easier to understand.
|
||||
|
||||
May 17th, 2003
|
||||
v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well
|
||||
as several smaller suggestions. Thanks!
|
||||
|
72
demo/demo.c
@ -53,7 +53,7 @@ int main(void)
|
||||
#ifdef TIMER
|
||||
int n;
|
||||
ulong64 tt;
|
||||
FILE *log, *logb;
|
||||
FILE *log, *logb, *logc;
|
||||
#endif
|
||||
|
||||
mp_init(&a);
|
||||
@ -62,11 +62,54 @@ int main(void)
|
||||
mp_init(&d);
|
||||
mp_init(&e);
|
||||
mp_init(&f);
|
||||
|
||||
srand(time(NULL));
|
||||
/* test mp_reduce_2k */
|
||||
#if 0
|
||||
for (cnt = 3; cnt <= 4096; ++cnt) {
|
||||
mp_digit tmp;
|
||||
mp_2expt(&a, cnt);
|
||||
mp_sub_d(&a, 1, &a); /* a = 2**cnt - 1 */
|
||||
|
||||
|
||||
printf("\nTesting %4d bits", cnt);
|
||||
printf("(%d)", mp_reduce_is_2k(&a));
|
||||
mp_reduce_2k_setup(&a, &tmp);
|
||||
printf("(%d)", tmp);
|
||||
for (ix = 0; ix < 100000; ix++) {
|
||||
if (!(ix & 1023)) {printf("."); fflush(stdout); }
|
||||
mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2);
|
||||
mp_copy(&c, &b);
|
||||
mp_mod(&c, &a, &c);
|
||||
mp_reduce_2k(&b, &a, 1);
|
||||
if (mp_cmp(&c, &b)) {
|
||||
printf("FAILED\n");
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* test mp_div_3 */
|
||||
#if 0
|
||||
for (cnt = 0; cnt < 1000000; ) {
|
||||
mp_digit r1, r2;
|
||||
|
||||
if (!(++cnt & 127)) printf("%9d\r", cnt);
|
||||
mp_rand(&a, abs(rand()) % 32 + 1);
|
||||
mp_div_d(&a, 3, &b, &r1);
|
||||
mp_div_3(&a, &c, &r2);
|
||||
|
||||
if (mp_cmp(&b, &c) || r1 != r2) {
|
||||
printf("Failure\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* test the DR reduction */
|
||||
#if 0
|
||||
|
||||
srand(time(NULL));
|
||||
for (cnt = 2; cnt < 32; cnt++) {
|
||||
printf("%d digit modulus\n", cnt);
|
||||
mp_grow(&a, cnt);
|
||||
@ -91,6 +134,7 @@ int main(void)
|
||||
|
||||
if (mp_cmp(&b, &c) != MP_EQ) {
|
||||
printf("Failed on trial %lu\n", rr); exit(-1);
|
||||
|
||||
}
|
||||
} while (++rr < 1000000);
|
||||
printf("Passed DR test for %d digits\n", cnt);
|
||||
@ -98,6 +142,9 @@ int main(void)
|
||||
#endif
|
||||
|
||||
#ifdef TIMER
|
||||
/* temp. turn off TOOM */
|
||||
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
|
||||
|
||||
printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC);
|
||||
|
||||
log = fopen("logs/add.log", "w");
|
||||
@ -172,9 +219,16 @@ int main(void)
|
||||
}
|
||||
fclose(log);
|
||||
}
|
||||
|
||||
{
|
||||
{
|
||||
char *primes[] = {
|
||||
/* 2K moduli mersenne primes */
|
||||
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
|
||||
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
|
||||
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
|
||||
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
|
||||
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
|
||||
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
|
||||
|
||||
/* DR moduli */
|
||||
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
|
||||
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
|
||||
@ -196,6 +250,7 @@ int main(void)
|
||||
};
|
||||
log = fopen("logs/expt.log", "w");
|
||||
logb = fopen("logs/expt_dr.log", "w");
|
||||
logc = fopen("logs/expt_2k.log", "w");
|
||||
for (n = 0; primes[n]; n++) {
|
||||
mp_read_radix(&a, primes[n], 10);
|
||||
mp_zero(&b);
|
||||
@ -224,11 +279,12 @@ int main(void)
|
||||
exit(0);
|
||||
}
|
||||
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
|
||||
fprintf((n < 7) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
|
||||
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
|
||||
}
|
||||
}
|
||||
fclose(log);
|
||||
fclose(logb);
|
||||
fclose(logc);
|
||||
|
||||
log = fopen("logs/invmod.log", "w");
|
||||
for (cnt = 4; cnt <= 128; cnt += 4) {
|
||||
@ -263,6 +319,12 @@ int main(void)
|
||||
|
||||
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
|
||||
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = 0;
|
||||
|
||||
/* force KARA and TOOM to enable despite cutoffs */
|
||||
KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 110;
|
||||
TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 150;
|
||||
|
||||
|
||||
|
||||
for (;;) {
|
||||
/* randomly clear and re-init one variable, this has the affect of triming the alloc space */
|
||||
|
2
etc/2kprime.1
Normal file
@ -0,0 +1,2 @@
|
||||
256-bits (k = 36113) = 115792089237316195423570985008687907853269984665640564039457584007913129603823
|
||||
512-bits (k = 38117) = 13407807929942597099574024998205846127479365820592393377723561443721764030073546976801874298166903427690031858186486050853753882811946569946433649006045979
|
80
etc/2kprime.c
Normal file
@ -0,0 +1,80 @@
|
||||
/* Makes safe primes of a 2k nature */
|
||||
#include <tommath.h>
|
||||
#include <time.h>
|
||||
|
||||
int sizes[] = {256, 512, 768, 1024, 1536, 2048, 3072, 4096};
|
||||
|
||||
int main(void)
|
||||
{
|
||||
char buf[2000];
|
||||
int x, y, t;
|
||||
mp_int q, p;
|
||||
FILE *out;
|
||||
clock_t t1;
|
||||
mp_digit z;
|
||||
|
||||
mp_init_multi(&q, &p, NULL);
|
||||
|
||||
out = fopen("2kprime.1", "w");
|
||||
for (x = 0; x < (int)(sizeof(sizes) / sizeof(sizes[0])); x++) {
|
||||
top:
|
||||
mp_2expt(&q, sizes[x]);
|
||||
mp_add_d(&q, 3, &q);
|
||||
z = -3;
|
||||
|
||||
t1 = clock();
|
||||
for(;;) {
|
||||
mp_sub_d(&q, 4, &q);
|
||||
z += 4;
|
||||
|
||||
if (z > MP_MASK) {
|
||||
printf("No primes of size %d found\n", sizes[x]);
|
||||
break;
|
||||
}
|
||||
|
||||
if (clock() - t1 > CLOCKS_PER_SEC) {
|
||||
printf("."); fflush(stdout);
|
||||
// sleep((clock() - t1 + CLOCKS_PER_SEC/2)/CLOCKS_PER_SEC);
|
||||
t1 = clock();
|
||||
}
|
||||
|
||||
/* quick test on q */
|
||||
mp_prime_is_prime(&q, 1, &y);
|
||||
if (y == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* find (q-1)/2 */
|
||||
mp_sub_d(&q, 1, &p);
|
||||
mp_div_2(&p, &p);
|
||||
mp_prime_is_prime(&p, 3, &y);
|
||||
if (y == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* test on q */
|
||||
mp_prime_is_prime(&q, 3, &y);
|
||||
if (y == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (y == 0) {
|
||||
++sizes[x];
|
||||
goto top;
|
||||
}
|
||||
|
||||
mp_toradix(&q, buf, 10);
|
||||
printf("\n\n%d-bits (k = %lu) = %s\n", sizes[x], z, buf);
|
||||
fprintf(out, "%d-bits (k = %lu) = %s\n", sizes[x], z, buf); fflush(out);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -32,9 +32,13 @@ mersenne: mersenne.o
|
||||
drprime: drprime.o
|
||||
$(CC) drprime.o $(LIBNAME) -o drprime
|
||||
|
||||
# fines 2k safe primes for the given config
|
||||
2kprime: 2kprime.o
|
||||
$(CC) 2kprime.o $(LIBNAME) -o 2kprime
|
||||
|
||||
mont: mont.o
|
||||
$(CC) mont.o $(LIBNAME) -o mont
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont
|
||||
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime
|
@ -14,4 +14,7 @@ tune: tune.obj
|
||||
cl tune.obj ../tommath.lib
|
||||
|
||||
drprime: drprime.obj
|
||||
cl drprime.obj ../tommath.lib
|
||||
cl drprime.obj ../tommath.lib
|
||||
|
||||
2kprime: 2kprime.obj
|
||||
cl 2kprime.obj ../tommath.lib
|
||||
|
@ -8,10 +8,9 @@
|
||||
int
|
||||
is_mersenne (long s, int *pp)
|
||||
{
|
||||
mp_int n, u, mu;
|
||||
mp_int n, u;
|
||||
int res, k;
|
||||
long ss;
|
||||
|
||||
|
||||
*pp = 0;
|
||||
|
||||
if ((res = mp_init (&n)) != MP_OKAY) {
|
||||
@ -22,27 +21,14 @@ is_mersenne (long s, int *pp)
|
||||
goto __N;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&mu)) != MP_OKAY) {
|
||||
goto __U;
|
||||
}
|
||||
|
||||
/* n = 2^s - 1 */
|
||||
mp_set (&n, 1);
|
||||
ss = s;
|
||||
while (ss--) {
|
||||
if ((res = mp_mul_2 (&n, &n)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((res = mp_2expt(&n, s)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
if ((res = mp_sub_d (&n, 1, &n)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* setup mu */
|
||||
if ((res = mp_reduce_setup (&mu, &n)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
|
||||
/* set u=4 */
|
||||
mp_set (&u, 4);
|
||||
|
||||
@ -57,26 +43,26 @@ is_mersenne (long s, int *pp)
|
||||
}
|
||||
|
||||
/* make sure u is positive */
|
||||
if (u.sign == MP_NEG) {
|
||||
while (u.sign == MP_NEG) {
|
||||
if ((res = mp_add (&u, &n, &u)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto __MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* reduce */
|
||||
if ((res = mp_reduce (&u, &n, &mu)) != MP_OKAY) {
|
||||
if ((res = mp_reduce_2k (&u, &n, 1)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* if u == 0 then its prime */
|
||||
if (mp_iszero (&u) == 1) {
|
||||
*pp = 1;
|
||||
mp_prime_is_prime(&n, 3, pp);
|
||||
if (*pp != 1) printf("FAILURE\n");
|
||||
}
|
||||
|
||||
res = MP_OKAY;
|
||||
__MU:mp_clear (&mu);
|
||||
__U:mp_clear (&u);
|
||||
__MU:mp_clear (&u);
|
||||
__N:mp_clear (&n);
|
||||
return res;
|
||||
}
|
||||
|
@ -7,10 +7,11 @@ int main(void)
|
||||
mp_digit mp;
|
||||
long x, y;
|
||||
|
||||
srand(time(NULL));
|
||||
mp_init_multi(&modulus, &R, &p, &pp, NULL);
|
||||
|
||||
/* loop through various sizes */
|
||||
for (x = 4; x < 128; x++) {
|
||||
for (x = 4; x < 256; x++) {
|
||||
printf("DIGITS == %3ld...", x); fflush(stdout);
|
||||
|
||||
/* make up the odd modulus */
|
||||
@ -22,7 +23,7 @@ int main(void)
|
||||
mp_montgomery_setup(&modulus, &mp);
|
||||
|
||||
/* now run through a bunch tests */
|
||||
for (y = 0; y < 100000; y++) {
|
||||
for (y = 0; y < 1000; y++) {
|
||||
mp_rand(&p, x/2); /* p = random */
|
||||
mp_mul(&p, &R, &pp); /* pp = R * p */
|
||||
mp_montgomery_reduce(&pp, &modulus, mp);
|
||||
|
76
etc/tune.c
@ -8,17 +8,17 @@
|
||||
#ifndef X86_TIMER
|
||||
|
||||
/* generic ISO C timer */
|
||||
unsigned long long __T;
|
||||
ulong64 __T;
|
||||
void t_start(void) { __T = clock(); }
|
||||
unsigned long long t_read(void) { return clock() - __T; }
|
||||
ulong64 t_read(void) { return clock() - __T; }
|
||||
|
||||
#else
|
||||
extern void t_start(void);
|
||||
extern unsigned long long t_read(void);
|
||||
extern ulong64 t_read(void);
|
||||
#endif
|
||||
|
||||
unsigned long long
|
||||
time_mult (void)
|
||||
ulong64
|
||||
time_mult (int max)
|
||||
{
|
||||
int x, y;
|
||||
mp_int a, b, c;
|
||||
@ -28,7 +28,7 @@ time_mult (void)
|
||||
mp_init (&c);
|
||||
|
||||
t_start();
|
||||
for (x = 32; x <= 288; x += 4) {
|
||||
for (x = 32; x <= max; x += 4) {
|
||||
mp_rand (&a, x);
|
||||
mp_rand (&b, x);
|
||||
for (y = 0; y < 100; y++) {
|
||||
@ -41,8 +41,8 @@ time_mult (void)
|
||||
return t_read();
|
||||
}
|
||||
|
||||
unsigned long long
|
||||
time_sqr (void)
|
||||
ulong64
|
||||
time_sqr (int max)
|
||||
{
|
||||
int x, y;
|
||||
mp_int a, b;
|
||||
@ -51,7 +51,7 @@ time_sqr (void)
|
||||
mp_init (&b);
|
||||
|
||||
t_start();
|
||||
for (x = 32; x <= 288; x += 4) {
|
||||
for (x = 32; x <= max; x += 4) {
|
||||
mp_rand (&a, x);
|
||||
for (y = 0; y < 100; y++) {
|
||||
mp_sqr (&a, &b);
|
||||
@ -65,45 +65,85 @@ time_sqr (void)
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int best_mult, best_square;
|
||||
unsigned long long best, ti;
|
||||
int best_kmult, best_tmult, best_ksquare, best_tsquare;
|
||||
ulong64 best, ti;
|
||||
FILE *log;
|
||||
|
||||
best_mult = best_square = 0;
|
||||
best_kmult = best_ksquare = best_tmult = best_tsquare = 0;
|
||||
/* tune multiplication first */
|
||||
|
||||
/* effectively turn TOOM off */
|
||||
TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 100000;
|
||||
|
||||
log = fopen ("mult.log", "w");
|
||||
best = -1;
|
||||
for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 200; KARATSUBA_MUL_CUTOFF++) {
|
||||
ti = time_mult ();
|
||||
ti = time_mult (300);
|
||||
printf ("%4d : %9llu\r", KARATSUBA_MUL_CUTOFF, ti);
|
||||
fprintf (log, "%d, %llu\n", KARATSUBA_MUL_CUTOFF, ti);
|
||||
fflush (stdout);
|
||||
if (ti < best) {
|
||||
printf ("New best: %llu, %d \n", ti, KARATSUBA_MUL_CUTOFF);
|
||||
best = ti;
|
||||
best_mult = KARATSUBA_MUL_CUTOFF;
|
||||
best_kmult = KARATSUBA_MUL_CUTOFF;
|
||||
}
|
||||
}
|
||||
fclose (log);
|
||||
|
||||
/* tune squaring */
|
||||
log = fopen ("sqr.log", "w");
|
||||
best = -1;
|
||||
for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 200; KARATSUBA_SQR_CUTOFF++) {
|
||||
ti = time_sqr ();
|
||||
ti = time_sqr (300);
|
||||
printf ("%4d : %9llu\r", KARATSUBA_SQR_CUTOFF, ti);
|
||||
fprintf (log, "%d, %llu\n", KARATSUBA_SQR_CUTOFF, ti);
|
||||
fflush (stdout);
|
||||
if (ti < best) {
|
||||
printf ("New best: %llu, %d \n", ti, KARATSUBA_SQR_CUTOFF);
|
||||
best = ti;
|
||||
best_square = KARATSUBA_SQR_CUTOFF;
|
||||
best_ksquare = KARATSUBA_SQR_CUTOFF;
|
||||
}
|
||||
}
|
||||
fclose (log);
|
||||
|
||||
KARATSUBA_MUL_CUTOFF = best_kmult;
|
||||
KARATSUBA_SQR_CUTOFF = best_ksquare;
|
||||
|
||||
/* tune TOOM mult */
|
||||
log = fopen ("tmult.log", "w");
|
||||
best = -1;
|
||||
for (TOOM_MUL_CUTOFF = best_kmult*5; TOOM_MUL_CUTOFF <= 800; TOOM_MUL_CUTOFF++) {
|
||||
ti = time_mult (1200);
|
||||
printf ("%4d : %9llu\r", TOOM_MUL_CUTOFF, ti);
|
||||
fprintf (log, "%d, %llu\n", TOOM_MUL_CUTOFF, ti);
|
||||
fflush (stdout);
|
||||
if (ti < best) {
|
||||
printf ("New best: %llu, %d \n", ti, TOOM_MUL_CUTOFF);
|
||||
best = ti;
|
||||
best_tmult = TOOM_MUL_CUTOFF;
|
||||
}
|
||||
}
|
||||
fclose (log);
|
||||
|
||||
/* tune TOOM sqr */
|
||||
log = fopen ("tsqr.log", "w");
|
||||
best = -1;
|
||||
for (TOOM_SQR_CUTOFF = best_ksquare*3; TOOM_SQR_CUTOFF <= 800; TOOM_SQR_CUTOFF++) {
|
||||
ti = time_sqr (1200);
|
||||
printf ("%4d : %9llu\r", TOOM_SQR_CUTOFF, ti);
|
||||
fprintf (log, "%d, %llu\n", TOOM_SQR_CUTOFF, ti);
|
||||
fflush (stdout);
|
||||
if (ti < best) {
|
||||
printf ("New best: %llu, %d \n", ti, TOOM_SQR_CUTOFF);
|
||||
best = ti;
|
||||
best_tsquare = TOOM_SQR_CUTOFF;
|
||||
}
|
||||
}
|
||||
fclose (log);
|
||||
|
||||
printf
|
||||
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\n",
|
||||
best_mult, best_square);
|
||||
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\nToom Multiplier Cutoff: %d\nToom Squaring Cutoff: %d\n",
|
||||
best_kmult, best_ksquare, best_tmult, best_tsquare);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
4
gen.pl
@ -6,7 +6,7 @@
|
||||
use strict;
|
||||
|
||||
open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!";
|
||||
foreach my $filename (glob "bn_*.c") {
|
||||
foreach my $filename (glob "bn*.c") {
|
||||
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
|
||||
print OUT "/* Start: $filename */\n";
|
||||
print OUT qq[#line 0 "$filename"\n];
|
||||
@ -14,5 +14,5 @@ foreach my $filename (glob "bn_*.c") {
|
||||
print OUT "\n/* End: $filename */\n\n";
|
||||
close SRC or die "Error closing $filename after reading: $!";
|
||||
}
|
||||
print OUT "\b/* EOF */\n";
|
||||
print OUT "\n/* EOF */\n";
|
||||
close OUT or die "Error closing mpi.c after writing: $!";
|
32
logs/add.log
@ -1,16 +1,16 @@
|
||||
224 11039864
|
||||
448 9206336
|
||||
672 8178200
|
||||
896 7432176
|
||||
1120 6433264
|
||||
1344 5847056
|
||||
1568 5270184
|
||||
1792 4943416
|
||||
2016 4520016
|
||||
2240 4256168
|
||||
2464 3999224
|
||||
2688 3714896
|
||||
2912 3572720
|
||||
3136 3340176
|
||||
3360 3222584
|
||||
3584 3036336
|
||||
224 11069160
|
||||
448 9156136
|
||||
672 8089755
|
||||
896 7399424
|
||||
1120 6389352
|
||||
1344 5818648
|
||||
1568 5257112
|
||||
1792 4982160
|
||||
2016 4527856
|
||||
2240 4325312
|
||||
2464 4051760
|
||||
2688 3767640
|
||||
2912 3612520
|
||||
3136 3415208
|
||||
3360 3258656
|
||||
3584 3113360
|
||||
|
BIN
logs/addsub.png
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 6.8 KiB |
@ -1,7 +1,7 @@
|
||||
14364 666
|
||||
21532 253
|
||||
28700 117
|
||||
57372 17
|
||||
71708 9
|
||||
86044 5
|
||||
114716 2
|
||||
513 680
|
||||
769 257
|
||||
1025 117
|
||||
2049 17
|
||||
2561 9
|
||||
3073 5
|
||||
4097 2
|
||||
|
BIN
logs/expt.png
Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 7.2 KiB |
6
logs/expt_2k.log
Normal file
@ -0,0 +1,6 @@
|
||||
521 736
|
||||
607 552
|
||||
1279 112
|
||||
2203 33
|
||||
3217 13
|
||||
4253 6
|
@ -1,7 +1,7 @@
|
||||
14896 1088
|
||||
21952 468
|
||||
29008 244
|
||||
43120 91
|
||||
58016 43
|
||||
86240 15
|
||||
115248 6
|
||||
532 1064
|
||||
784 460
|
||||
1036 240
|
||||
1540 91
|
||||
2072 43
|
||||
3080 15
|
||||
4116 6
|
||||
|
@ -1,5 +1,5 @@
|
||||
set terminal png color
|
||||
set size 1.5
|
||||
set size 1.75
|
||||
set ylabel "Operations per Second"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
@ -10,7 +10,7 @@ set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
@ -1,32 +1,32 @@
|
||||
112 15608
|
||||
224 7840
|
||||
336 5104
|
||||
448 3376
|
||||
560 2616
|
||||
672 1984
|
||||
784 1640
|
||||
896 2056
|
||||
1008 1136
|
||||
1120 936
|
||||
1232 1240
|
||||
1344 1112
|
||||
1456 608
|
||||
1568 873
|
||||
1680 492
|
||||
1792 444
|
||||
1904 640
|
||||
2016 584
|
||||
2128 328
|
||||
2240 307
|
||||
2352 283
|
||||
2464 256
|
||||
2576 393
|
||||
2688 365
|
||||
2800 344
|
||||
2912 196
|
||||
3024 301
|
||||
3136 170
|
||||
3248 160
|
||||
3360 250
|
||||
3472 144
|
||||
3584 224
|
||||
112 16248
|
||||
224 8192
|
||||
336 5320
|
||||
448 3560
|
||||
560 2728
|
||||
672 2064
|
||||
784 1704
|
||||
896 2176
|
||||
1008 1184
|
||||
1120 976
|
||||
1232 1280
|
||||
1344 1176
|
||||
1456 624
|
||||
1568 912
|
||||
1680 504
|
||||
1792 452
|
||||
1904 658
|
||||
2016 608
|
||||
2128 336
|
||||
2240 312
|
||||
2352 288
|
||||
2464 264
|
||||
2576 408
|
||||
2688 376
|
||||
2800 354
|
||||
2912 198
|
||||
3024 307
|
||||
3136 173
|
||||
3248 162
|
||||
3360 256
|
||||
3472 145
|
||||
3584 226
|
||||
|
BIN
logs/invmod.png
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 5.6 KiB |
13
logs/k7/README
Normal file
@ -0,0 +1,13 @@
|
||||
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
|
||||
Todo this type
|
||||
|
||||
make timing ; ltmtest
|
||||
|
||||
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
|
||||
|
||||
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
|
||||
them all :-)
|
||||
|
||||
Have fun
|
||||
|
||||
Tom
|
16
logs/k7/add.log
Normal file
@ -0,0 +1,16 @@
|
||||
224 11069160
|
||||
448 9156136
|
||||
672 8089755
|
||||
896 7399424
|
||||
1120 6389352
|
||||
1344 5818648
|
||||
1568 5257112
|
||||
1792 4982160
|
||||
2016 4527856
|
||||
2240 4325312
|
||||
2464 4051760
|
||||
2688 3767640
|
||||
2912 3612520
|
||||
3136 3415208
|
||||
3360 3258656
|
||||
3584 3113360
|
BIN
logs/k7/addsub.png
Normal file
After Width: | Height: | Size: 6.8 KiB |
7
logs/k7/expt.log
Normal file
@ -0,0 +1,7 @@
|
||||
513 664
|
||||
769 256
|
||||
1025 117
|
||||
2049 17
|
||||
2561 9
|
||||
3073 5
|
||||
4097 2
|
BIN
logs/k7/expt.png
Normal file
After Width: | Height: | Size: 6.3 KiB |
7
logs/k7/expt_dr.log
Normal file
@ -0,0 +1,7 @@
|
||||
532 1088
|
||||
784 460
|
||||
1036 240
|
||||
1540 92
|
||||
2072 43
|
||||
3080 15
|
||||
4116 6
|
17
logs/k7/graphs.dem
Normal file
@ -0,0 +1,17 @@
|
||||
set terminal png color
|
||||
set size 1.75
|
||||
set ylabel "Operations per Second"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
set output "addsub.png"
|
||||
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
|
||||
|
||||
set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
24
logs/k7/index.html
Normal file
@ -0,0 +1,24 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>LibTomMath Log Plots</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Addition and Subtraction</h1>
|
||||
<center><img src=addsub.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Multipliers</h1>
|
||||
<center><img src=mult.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Exptmod</h1>
|
||||
<center><img src=expt.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Modular Inverse</h1>
|
||||
<center><img src=invmod.png></center>
|
||||
<hr>
|
||||
|
||||
</body>
|
||||
</html>
|
32
logs/k7/invmod.log
Normal file
@ -0,0 +1,32 @@
|
||||
112 16248
|
||||
224 8192
|
||||
336 5320
|
||||
448 3560
|
||||
560 2728
|
||||
672 2064
|
||||
784 1704
|
||||
896 2176
|
||||
1008 1184
|
||||
1120 976
|
||||
1232 1280
|
||||
1344 1176
|
||||
1456 624
|
||||
1568 912
|
||||
1680 504
|
||||
1792 452
|
||||
1904 658
|
||||
2016 608
|
||||
2128 336
|
||||
2240 312
|
||||
2352 288
|
||||
2464 264
|
||||
2576 408
|
||||
2688 376
|
||||
2800 354
|
||||
2912 198
|
||||
3024 307
|
||||
3136 173
|
||||
3248 162
|
||||
3360 256
|
||||
3472 145
|
||||
3584 226
|
BIN
logs/k7/invmod.png
Normal file
After Width: | Height: | Size: 5.6 KiB |
17
logs/k7/mult.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 322904
|
||||
1344 151592
|
||||
1792 90472
|
||||
2240 59984
|
||||
2688 42624
|
||||
3136 31872
|
||||
3584 24704
|
||||
4032 19704
|
||||
4480 16096
|
||||
4928 13376
|
||||
5376 11272
|
||||
5824 9616
|
||||
6272 8360
|
||||
6720 7304
|
||||
7168 1664
|
||||
7616 1472
|
||||
8064 1328
|
BIN
logs/k7/mult.png
Normal file
After Width: | Height: | Size: 8.1 KiB |
17
logs/k7/mult_kara.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 322872
|
||||
1344 151688
|
||||
1792 90480
|
||||
2240 59984
|
||||
2688 42656
|
||||
3136 32144
|
||||
3584 25840
|
||||
4032 21328
|
||||
4480 17856
|
||||
4928 14928
|
||||
5376 12856
|
||||
5824 11256
|
||||
6272 9880
|
||||
6720 8984
|
||||
7168 7928
|
||||
7616 7200
|
||||
8064 6576
|
17
logs/k7/sqr.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 415472
|
||||
1344 223736
|
||||
1792 141232
|
||||
2240 97624
|
||||
2688 71400
|
||||
3136 54800
|
||||
3584 16904
|
||||
4032 13528
|
||||
4480 10968
|
||||
4928 9128
|
||||
5376 7784
|
||||
5824 6672
|
||||
6272 5760
|
||||
6720 5056
|
||||
7168 4440
|
||||
7616 3952
|
||||
8064 3512
|
17
logs/k7/sqr_kara.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 420464
|
||||
1344 224800
|
||||
1792 142808
|
||||
2240 97704
|
||||
2688 71416
|
||||
3136 54504
|
||||
3584 38320
|
||||
4032 32360
|
||||
4480 27576
|
||||
4928 23840
|
||||
5376 20688
|
||||
5824 18264
|
||||
6272 16176
|
||||
6720 14440
|
||||
7168 11688
|
||||
7616 10752
|
||||
8064 9936
|
16
logs/k7/sub.log
Normal file
@ -0,0 +1,16 @@
|
||||
224 9728504
|
||||
448 8573648
|
||||
672 7488096
|
||||
896 6714064
|
||||
1120 5950472
|
||||
1344 5457400
|
||||
1568 5038896
|
||||
1792 4683632
|
||||
2016 4384656
|
||||
2240 4105976
|
||||
2464 3871608
|
||||
2688 3650680
|
||||
2912 3463552
|
||||
3136 3290016
|
||||
3360 3135272
|
||||
3584 2993848
|
@ -1,17 +1,17 @@
|
||||
896 321504
|
||||
1344 150784
|
||||
1792 90288
|
||||
2240 59760
|
||||
2688 42480
|
||||
3136 32056
|
||||
3584 24600
|
||||
4032 19656
|
||||
4480 16024
|
||||
4928 13328
|
||||
5376 11280
|
||||
5824 9624
|
||||
6272 8336
|
||||
6720 7280
|
||||
7168 1648
|
||||
7616 1464
|
||||
8064 1296
|
||||
896 322904
|
||||
1344 151592
|
||||
1792 90472
|
||||
2240 59984
|
||||
2688 42624
|
||||
3136 31872
|
||||
3584 24704
|
||||
4032 19704
|
||||
4480 16096
|
||||
4928 13376
|
||||
5376 11272
|
||||
5824 9616
|
||||
6272 8360
|
||||
6720 7304
|
||||
7168 1664
|
||||
7616 1472
|
||||
8064 1328
|
||||
|
BIN
logs/mult.png
Before Width: | Height: | Size: 6.9 KiB After Width: | Height: | Size: 8.1 KiB |
@ -1,17 +1,17 @@
|
||||
896 321928
|
||||
1344 150752
|
||||
1792 90136
|
||||
2240 59888
|
||||
2688 42480
|
||||
3136 32080
|
||||
3584 25744
|
||||
4032 21216
|
||||
4480 17912
|
||||
4928 14896
|
||||
5376 12936
|
||||
5824 11216
|
||||
6272 9848
|
||||
6720 8896
|
||||
7168 7968
|
||||
7616 7248
|
||||
8064 6600
|
||||
896 322872
|
||||
1344 151688
|
||||
1792 90480
|
||||
2240 59984
|
||||
2688 42656
|
||||
3136 32144
|
||||
3584 25840
|
||||
4032 21328
|
||||
4480 17856
|
||||
4928 14928
|
||||
5376 12856
|
||||
5824 11256
|
||||
6272 9880
|
||||
6720 8984
|
||||
7168 7928
|
||||
7616 7200
|
||||
8064 6576
|
||||
|
13
logs/p4/README
Normal file
@ -0,0 +1,13 @@
|
||||
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
|
||||
Todo this type
|
||||
|
||||
make timing ; ltmtest
|
||||
|
||||
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
|
||||
|
||||
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
|
||||
them all :-)
|
||||
|
||||
Have fun
|
||||
|
||||
Tom
|
16
logs/p4/add.log
Normal file
@ -0,0 +1,16 @@
|
||||
224 8113248
|
||||
448 6585584
|
||||
672 5687678
|
||||
896 4761144
|
||||
1120 4111592
|
||||
1344 3995154
|
||||
1568 3532387
|
||||
1792 3225400
|
||||
2016 2963960
|
||||
2240 2720112
|
||||
2464 2533952
|
||||
2688 2307168
|
||||
2912 2287064
|
||||
3136 2150160
|
||||
3360 2035992
|
||||
3584 1936304
|
BIN
logs/p4/addsub.png
Normal file
After Width: | Height: | Size: 6.7 KiB |
7
logs/p4/expt.log
Normal file
@ -0,0 +1,7 @@
|
||||
513 195
|
||||
769 68
|
||||
1025 31
|
||||
2049 4
|
||||
2561 2
|
||||
3073 1
|
||||
4097 0
|
BIN
logs/p4/expt.png
Normal file
After Width: | Height: | Size: 6.4 KiB |
7
logs/p4/expt_dr.log
Normal file
@ -0,0 +1,7 @@
|
||||
532 393
|
||||
784 158
|
||||
1036 79
|
||||
1540 27
|
||||
2072 12
|
||||
3080 4
|
||||
4116 1
|
17
logs/p4/graphs.dem
Normal file
@ -0,0 +1,17 @@
|
||||
set terminal png color
|
||||
set size 1.75
|
||||
set ylabel "Operations per Second"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
set output "addsub.png"
|
||||
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
|
||||
|
||||
set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
24
logs/p4/index.html
Normal file
@ -0,0 +1,24 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>LibTomMath Log Plots</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Addition and Subtraction</h1>
|
||||
<center><img src=addsub.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Multipliers</h1>
|
||||
<center><img src=mult.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Exptmod</h1>
|
||||
<center><img src=expt.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Modular Inverse</h1>
|
||||
<center><img src=invmod.png></center>
|
||||
<hr>
|
||||
|
||||
</body>
|
||||
</html>
|
32
logs/p4/invmod.log
Normal file
@ -0,0 +1,32 @@
|
||||
112 13608
|
||||
224 6872
|
||||
336 4264
|
||||
448 2792
|
||||
560 2144
|
||||
672 1560
|
||||
784 1296
|
||||
896 1672
|
||||
1008 896
|
||||
1120 736
|
||||
1232 1024
|
||||
1344 888
|
||||
1456 472
|
||||
1568 680
|
||||
1680 373
|
||||
1792 328
|
||||
1904 484
|
||||
2016 436
|
||||
2128 232
|
||||
2240 211
|
||||
2352 200
|
||||
2464 177
|
||||
2576 293
|
||||
2688 262
|
||||
2800 251
|
||||
2912 137
|
||||
3024 216
|
||||
3136 117
|
||||
3248 113
|
||||
3360 181
|
||||
3472 98
|
||||
3584 158
|
BIN
logs/p4/invmod.png
Normal file
After Width: | Height: | Size: 5.5 KiB |
17
logs/p4/mult.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 77600
|
||||
1344 35776
|
||||
1792 19688
|
||||
2240 13248
|
||||
2688 9424
|
||||
3136 7056
|
||||
3584 5464
|
||||
4032 4368
|
||||
4480 3568
|
||||
4928 2976
|
||||
5376 2520
|
||||
5824 2152
|
||||
6272 1872
|
||||
6720 1632
|
||||
7168 650
|
||||
7616 576
|
||||
8064 515
|
BIN
logs/p4/mult.png
Normal file
After Width: | Height: | Size: 7.9 KiB |
17
logs/p4/mult_kara.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 77752
|
||||
1344 35832
|
||||
1792 19688
|
||||
2240 14704
|
||||
2688 10832
|
||||
3136 8336
|
||||
3584 6600
|
||||
4032 5424
|
||||
4480 4648
|
||||
4928 3976
|
||||
5376 3448
|
||||
5824 3016
|
||||
6272 2664
|
||||
6720 2384
|
||||
7168 2120
|
||||
7616 1912
|
||||
8064 1752
|
17
logs/p4/sqr.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 128088
|
||||
1344 63640
|
||||
1792 37968
|
||||
2240 25488
|
||||
2688 18176
|
||||
3136 13672
|
||||
3584 4920
|
||||
4032 3912
|
||||
4480 3160
|
||||
4928 2616
|
||||
5376 2216
|
||||
5824 1896
|
||||
6272 1624
|
||||
6720 1408
|
||||
7168 1240
|
||||
7616 1096
|
||||
8064 984
|
17
logs/p4/sqr_kara.log
Normal file
@ -0,0 +1,17 @@
|
||||
896 127456
|
||||
1344 63752
|
||||
1792 37920
|
||||
2240 25440
|
||||
2688 18200
|
||||
3136 13728
|
||||
3584 10968
|
||||
4032 9072
|
||||
4480 7608
|
||||
4928 6440
|
||||
5376 5528
|
||||
5824 4768
|
||||
6272 4328
|
||||
6720 3888
|
||||
7168 3504
|
||||
7616 3176
|
||||
8064 2896
|
16
logs/p4/sub.log
Normal file
@ -0,0 +1,16 @@
|
||||
224 7355896
|
||||
448 6162880
|
||||
672 5218984
|
||||
896 4622776
|
||||
1120 3999320
|
||||
1344 3629480
|
||||
1568 3290384
|
||||
1792 2954752
|
||||
2016 2737056
|
||||
2240 2563320
|
||||
2464 2451928
|
||||
2688 2310920
|
||||
2912 2139048
|
||||
3136 2034080
|
||||
3360 1890800
|
||||
3584 1808624
|
34
logs/sqr.log
@ -1,17 +1,17 @@
|
||||
896 416968
|
||||
1344 223672
|
||||
1792 141552
|
||||
2240 97280
|
||||
2688 71304
|
||||
3136 54648
|
||||
3584 16264
|
||||
4032 13000
|
||||
4480 10528
|
||||
4928 8776
|
||||
5376 7464
|
||||
5824 6440
|
||||
6272 5520
|
||||
6720 4808
|
||||
7168 4264
|
||||
7616 3784
|
||||
8064 3368
|
||||
896 415472
|
||||
1344 223736
|
||||
1792 141232
|
||||
2240 97624
|
||||
2688 71400
|
||||
3136 54800
|
||||
3584 16904
|
||||
4032 13528
|
||||
4480 10968
|
||||
4928 9128
|
||||
5376 7784
|
||||
5824 6672
|
||||
6272 5760
|
||||
6720 5056
|
||||
7168 4440
|
||||
7616 3952
|
||||
8064 3512
|
||||
|
@ -1,17 +1,17 @@
|
||||
896 416656
|
||||
1344 223728
|
||||
1792 141288
|
||||
2240 97456
|
||||
2688 71152
|
||||
3136 54392
|
||||
3584 38552
|
||||
4032 32216
|
||||
4480 27384
|
||||
4928 23792
|
||||
5376 20728
|
||||
5824 18232
|
||||
6272 16160
|
||||
6720 14408
|
||||
7168 11696
|
||||
7616 10768
|
||||
8064 9920
|
||||
896 420464
|
||||
1344 224800
|
||||
1792 142808
|
||||
2240 97704
|
||||
2688 71416
|
||||
3136 54504
|
||||
3584 38320
|
||||
4032 32360
|
||||
4480 27576
|
||||
4928 23840
|
||||
5376 20688
|
||||
5824 18264
|
||||
6272 16176
|
||||
6720 14440
|
||||
7168 11688
|
||||
7616 10752
|
||||
8064 9936
|
||||
|
32
logs/sub.log
@ -1,16 +1,16 @@
|
||||
224 9862520
|
||||
448 8562344
|
||||
672 7661400
|
||||
896 6838128
|
||||
1120 5911144
|
||||
1344 5394040
|
||||
1568 4993760
|
||||
1792 4624240
|
||||
2016 4332024
|
||||
2240 4029312
|
||||
2464 3790784
|
||||
2688 3587216
|
||||
2912 3397952
|
||||
3136 3239736
|
||||
3360 3080616
|
||||
3584 2933104
|
||||
224 9728504
|
||||
448 8573648
|
||||
672 7488096
|
||||
896 6714064
|
||||
1120 5950472
|
||||
1344 5457400
|
||||
1568 5038896
|
||||
1792 4683632
|
||||
2016 4384656
|
||||
2240 4105976
|
||||
2464 3871608
|
||||
2688 3650680
|
||||
2912 3463552
|
||||
3136 3290016
|
||||
3360 3135272
|
||||
3584 2993848
|
||||
|
16
makefile
@ -1,6 +1,6 @@
|
||||
CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops
|
||||
|
||||
VERSION=0.17
|
||||
VERSION=0.18
|
||||
|
||||
default: libtommath.a
|
||||
|
||||
@ -33,7 +33,9 @@ bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o bn_radix
|
||||
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
|
||||
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
|
||||
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o bn_mp_multi.o \
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
|
||||
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
|
||||
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o
|
||||
|
||||
libtommath.a: $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
|
||||
@ -63,6 +65,11 @@ docdvi: tommath.src
|
||||
makeindex tommath
|
||||
latex tommath > /dev/null
|
||||
|
||||
# poster, makes the single page PDF poster
|
||||
poster: poster.tex
|
||||
pdflatex poster
|
||||
rm -f poster.aux poster.log
|
||||
|
||||
# makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files
|
||||
docs:
|
||||
cd pics ; make pdfes
|
||||
@ -88,11 +95,12 @@ manual:
|
||||
|
||||
clean:
|
||||
rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
|
||||
tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c
|
||||
tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c \
|
||||
poster.aux poster.dvi poster.log
|
||||
cd etc ; make clean
|
||||
cd pics ; make clean
|
||||
|
||||
zipup: clean manual
|
||||
zipup: clean manual poster
|
||||
perl gen.pl ; mv mpi.c pre_gen/ ; \
|
||||
cd .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \
|
||||
cp -R ./libtommath/* ./libtommath-$(VERSION)/ ; tar -c libtommath-$(VERSION)/* > ltm-$(VERSION).tar ; \
|
||||
|
37
makefile.bcc
Normal file
@ -0,0 +1,37 @@
|
||||
#
|
||||
# Borland C++Builder Makefile (makefile.bcc)
|
||||
#
|
||||
|
||||
|
||||
LIB = tlib
|
||||
CC = bcc32
|
||||
CFLAGS = -c -O2 -I.
|
||||
|
||||
OBJECTS=bncore.obj bn_mp_init.obj bn_mp_clear.obj bn_mp_exch.obj bn_mp_grow.obj bn_mp_shrink.obj \
|
||||
bn_mp_clamp.obj bn_mp_zero.obj bn_mp_set.obj bn_mp_set_int.obj bn_mp_init_size.obj bn_mp_copy.obj \
|
||||
bn_mp_init_copy.obj bn_mp_abs.obj bn_mp_neg.obj bn_mp_cmp_mag.obj bn_mp_cmp.obj bn_mp_cmp_d.obj \
|
||||
bn_mp_rshd.obj bn_mp_lshd.obj bn_mp_mod_2d.obj bn_mp_div_2d.obj bn_mp_mul_2d.obj bn_mp_div_2.obj \
|
||||
bn_mp_mul_2.obj bn_s_mp_add.obj bn_s_mp_sub.obj bn_fast_s_mp_mul_digs.obj bn_s_mp_mul_digs.obj \
|
||||
bn_fast_s_mp_mul_high_digs.obj bn_s_mp_mul_high_digs.obj bn_fast_s_mp_sqr.obj bn_s_mp_sqr.obj \
|
||||
bn_mp_add.obj bn_mp_sub.obj bn_mp_karatsuba_mul.obj bn_mp_mul.obj bn_mp_karatsuba_sqr.obj \
|
||||
bn_mp_sqr.obj bn_mp_div.obj bn_mp_mod.obj bn_mp_add_d.obj bn_mp_sub_d.obj bn_mp_mul_d.obj \
|
||||
bn_mp_div_d.obj bn_mp_mod_d.obj bn_mp_expt_d.obj bn_mp_addmod.obj bn_mp_submod.obj \
|
||||
bn_mp_mulmod.obj bn_mp_sqrmod.obj bn_mp_gcd.obj bn_mp_lcm.obj bn_fast_mp_invmod.obj bn_mp_invmod.obj \
|
||||
bn_mp_reduce.obj bn_mp_montgomery_setup.obj bn_fast_mp_montgomery_reduce.obj bn_mp_montgomery_reduce.obj \
|
||||
bn_mp_exptmod_fast.obj bn_mp_exptmod.obj bn_mp_2expt.obj bn_mp_n_root.obj bn_mp_jacobi.obj bn_reverse.obj \
|
||||
bn_mp_count_bits.obj bn_mp_read_unsigned_bin.obj bn_mp_read_signed_bin.obj bn_mp_to_unsigned_bin.obj \
|
||||
bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn_radix.obj \
|
||||
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
|
||||
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
|
||||
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
|
||||
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
|
||||
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
|
||||
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj
|
||||
|
||||
TARGET = libtommath.lib
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
|
||||
.c.objbj:
|
||||
$(CC) $(CFLAGS) $<
|
||||
$(LIB) $(TARGET) -+$@
|
@ -23,7 +23,10 @@ bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn
|
||||
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
|
||||
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
|
||||
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
|
||||
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj
|
||||
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
|
||||
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
|
||||
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj
|
||||
|
||||
|
||||
|
||||
library: $(OBJECTS)
|
||||
|
@ -40,14 +40,10 @@ void rand_num(mp_int *a)
|
||||
int n, size;
|
||||
unsigned char buf[2048];
|
||||
|
||||
top:
|
||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 1024);
|
||||
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 1031;
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
for (n = 0; n < size; n++) {
|
||||
if (buf[n+1]) break;
|
||||
}
|
||||
if (n == size) goto top;
|
||||
while (buf[1] == 0) buf[1] = fgetc(rng);
|
||||
mp_read_raw(a, buf, 1+size);
|
||||
}
|
||||
|
||||
@ -56,14 +52,10 @@ void rand_num2(mp_int *a)
|
||||
int n, size;
|
||||
unsigned char buf[2048];
|
||||
|
||||
top:
|
||||
size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
|
||||
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 97;
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
for (n = 0; n < size; n++) {
|
||||
if (buf[n+1]) break;
|
||||
}
|
||||
if (n == size) goto top;
|
||||
while (buf[1] == 0) buf[1] = fgetc(rng);
|
||||
mp_read_raw(a, buf, 1+size);
|
||||
}
|
||||
|
||||
@ -73,6 +65,7 @@ int main(void)
|
||||
{
|
||||
int n;
|
||||
mp_int a, b, c, d, e;
|
||||
clock_t t1;
|
||||
char buf[4096];
|
||||
|
||||
mp_init(&a);
|
||||
@ -108,8 +101,14 @@ int main(void)
|
||||
}
|
||||
}
|
||||
|
||||
t1 = clock();
|
||||
for (;;) {
|
||||
n = fgetc(rng) % 13;
|
||||
if (clock() - t1 > CLOCKS_PER_SEC) {
|
||||
sleep(1);
|
||||
t1 = clock();
|
||||
}
|
||||
|
||||
n = fgetc(rng) % 13;
|
||||
|
||||
if (n == 0) {
|
||||
/* add tests */
|
||||
@ -227,6 +226,7 @@ int main(void)
|
||||
rand_num2(&a);
|
||||
rand_num2(&b);
|
||||
rand_num2(&c);
|
||||
// if (c.dp[0]&1) mp_add_d(&c, 1, &c);
|
||||
a.sign = b.sign = c.sign = 0;
|
||||
mp_exptmod(&a, &b, &c, &d);
|
||||
printf("expt\n");
|
||||
|