added libtommath-0.18

This commit is contained in:
Tom St Denis 2003-05-29 13:35:26 +00:00 committed by Steffen Jaeckel
parent fd181cc841
commit 0ef44cea9b
108 changed files with 9777 additions and 2427 deletions

BIN
bn.pdf

Binary file not shown.

2
bn.tex
View File

@ -1,7 +1,7 @@
\documentclass[]{article} \documentclass[]{article}
\begin{document} \begin{document}
\title{LibTomMath v0.17 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } \title{LibTomMath v0.18 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
\author{Tom St Denis \\ tomstdenis@iahu.ca} \author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle \maketitle
\newpage \newpage

View File

@ -14,7 +14,7 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* computes xR^-1 == x (mod N) via Montgomery Reduction /* computes xR**-1 == x (mod N) via Montgomery Reduction
* *
* This is an optimized implementation of mp_montgomery_reduce * This is an optimized implementation of mp_montgomery_reduce
* which uses the comba method to quickly calculate the columns of the * which uses the comba method to quickly calculate the columns of the
@ -23,76 +23,77 @@
* Based on Algorithm 14.32 on pp.601 of HAC. * Based on Algorithm 14.32 on pp.601 of HAC.
*/ */
int int
fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp) fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{ {
int ix, res, olduse; int ix, res, olduse;
mp_word W[MP_WARRAY]; mp_word W[MP_WARRAY];
/* get old used count */ /* get old used count */
olduse = a->used; olduse = x->used;
/* grow a as required */ /* grow a as required */
if (a->alloc < m->used + 1) { if (x->alloc < n->used + 1) {
if ((res = mp_grow (a, m->used + 1)) != MP_OKAY) { if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
return res; return res;
} }
} }
{ {
register mp_word *_W; register mp_word *_W;
register mp_digit *tmpa; register mp_digit *tmpx;
_W = W; _W = W;
tmpa = a->dp; tmpx = x->dp;
/* copy the digits of a into W[0..a->used-1] */ /* copy the digits of a into W[0..a->used-1] */
for (ix = 0; ix < a->used; ix++) { for (ix = 0; ix < x->used; ix++) {
*_W++ = *tmpa++; *_W++ = *tmpx++;
} }
/* zero the high words of W[a->used..m->used*2] */ /* zero the high words of W[a->used..m->used*2] */
for (; ix < m->used * 2 + 1; ix++) { for (; ix < n->used * 2 + 1; ix++) {
*_W++ = 0; *_W++ = 0;
} }
} }
for (ix = 0; ix < m->used; ix++) { for (ix = 0; ix < n->used; ix++) {
/* ui = ai * m' mod b /* mu = ai * m' mod b
* *
* We avoid a double precision multiplication (which isn't required) * We avoid a double precision multiplication (which isn't required)
* by casting the value down to a mp_digit. Note this requires that W[ix-1] have * by casting the value down to a mp_digit. Note this requires
* the carry cleared (see after the inner loop) * that W[ix-1] have the carry cleared (see after the inner loop)
*/ */
register mp_digit ui; register mp_digit mu;
ui = (((mp_digit) (W[ix] & MP_MASK)) * mp) & MP_MASK; mu = (((mp_digit) (W[ix] & MP_MASK)) * rho) & MP_MASK;
/* a = a + ui * m * b^i /* a = a + mu * m * b**i
* *
* This is computed in place and on the fly. The multiplication * This is computed in place and on the fly. The multiplication
* by b^i is handled by offseting which columns the results * by b**i is handled by offseting which columns the results
* are added to. * are added to.
* *
* Note the comba method normally doesn't handle carries in the inner loop * Note the comba method normally doesn't handle carries in the
* In this case we fix the carry from the previous column since the Montgomery * inner loop In this case we fix the carry from the previous
* reduction requires digits of the result (so far) [see above] to work. This is * column since the Montgomery reduction requires digits of the
* handled by fixing up one carry after the inner loop. The carry fixups are done * result (so far) [see above] to work. This is
* in order so after these loops the first m->used words of W[] have the carries * handled by fixing up one carry after the inner loop. The
* fixed * carry fixups are done in order so after these loops the
* first m->used words of W[] have the carries fixed
*/ */
{ {
register int iy; register int iy;
register mp_digit *tmpx; register mp_digit *tmpn;
register mp_word *_W; register mp_word *_W;
/* alias for the digits of the modulus */ /* alias for the digits of the modulus */
tmpx = m->dp; tmpn = n->dp;
/* Alias for the columns set by an offset of ix */ /* Alias for the columns set by an offset of ix */
_W = W + ix; _W = W + ix;
/* inner loop */ /* inner loop */
for (iy = 0; iy < m->used; iy++) { for (iy = 0; iy < n->used; iy++) {
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++); *_W++ += ((mp_word) mu) * ((mp_word) * tmpn++);
} }
} }
@ -102,44 +103,44 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
{ {
register mp_digit *tmpa; register mp_digit *tmpx;
register mp_word *_W, *_W1; register mp_word *_W, *_W1;
/* nox fix rest of carries */ /* nox fix rest of carries */
_W1 = W + ix; _W1 = W + ix;
_W = W + ++ix; _W = W + ++ix;
for (; ix <= m->used * 2 + 1; ix++) { for (; ix <= n->used * 2 + 1; ix++) {
*_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
} }
/* copy out, A = A/b^n /* copy out, A = A/b**n
* *
* The result is A/b^n but instead of converting from an array of mp_word * The result is A/b**n but instead of converting from an
* to mp_digit than calling mp_rshd we just copy them in the right * array of mp_word to mp_digit than calling mp_rshd
* order * we just copy them in the right order
*/ */
tmpa = a->dp; tmpx = x->dp;
_W = W + m->used; _W = W + n->used;
for (ix = 0; ix < m->used + 1; ix++) { for (ix = 0; ix < n->used + 1; ix++) {
*tmpa++ = *_W++ & ((mp_word) MP_MASK); *tmpx++ = *_W++ & ((mp_word) MP_MASK);
} }
/* zero oldused digits, if the input a was larger than /* zero oldused digits, if the input a was larger than
* m->used+1 we'll have to clear the digits */ * m->used+1 we'll have to clear the digits */
for (; ix < olduse; ix++) { for (; ix < olduse; ix++) {
*tmpa++ = 0; *tmpx++ = 0;
} }
} }
/* set the max used and clamp */ /* set the max used and clamp */
a->used = m->used + 1; x->used = n->used + 1;
mp_clamp (a); mp_clamp (x);
/* if A >= m then A = A - m */ /* if A >= m then A = A - m */
if (mp_cmp_mag (a, m) != MP_LT) { if (mp_cmp_mag (x, n) != MP_LT) {
return s_mp_sub (a, m, a); return s_mp_sub (x, n, x);
} }
return MP_OKAY; return MP_OKAY;
} }

View File

@ -16,15 +16,17 @@
/* fast squaring /* fast squaring
* *
* This is the comba method where the columns of the product are computed first * This is the comba method where the columns of the product
* then the carries are computed. This has the effect of making a very simple * are computed first then the carries are computed. This
* inner loop that is executed the most * has the effect of making a very simple inner loop that
* is executed the most
* *
* W2 represents the outer products and W the inner. * W2 represents the outer products and W the inner.
* *
* A further optimizations is made because the inner products are of the form * A further optimizations is made because the inner
* "A * B * 2". The *2 part does not need to be computed until the end which is * products are of the form "A * B * 2". The *2 part does
* good because 64-bit shifts are slow! * not need to be computed until the end which is good
* because 64-bit shifts are slow!
* *
* Based on Algorithm 14.16 on pp.597 of HAC. * Based on Algorithm 14.16 on pp.597 of HAC.
* *
@ -48,26 +50,15 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
* Note that there are two buffers. Since squaring requires * Note that there are two buffers. Since squaring requires
* a outter and inner product and the inner product requires * a outter and inner product and the inner product requires
* computing a product and doubling it (a relatively expensive * computing a product and doubling it (a relatively expensive
* op to perform n^2 times if you don't have to) the inner and * op to perform n**2 times if you don't have to) the inner and
* outer products are computed in different buffers. This way * outer products are computed in different buffers. This way
* the inner product can be doubled using n doublings instead of * the inner product can be doubled using n doublings instead of
* n^2 * n**2
*/ */
memset (W, 0, newused * sizeof (mp_word)); memset (W, 0, newused * sizeof (mp_word));
memset (W2, 0, newused * sizeof (mp_word)); memset (W2, 0, newused * sizeof (mp_word));
/* note optimization /* This computes the inner product. To simplify the inner N**2 loop
* values in W2 are only written in even locations which means
* we can collapse the array to 256 words [and fixup the memset above]
* provided we also fix up the summations below. Ideally
* the fixup loop should be unrolled twice to handle the even/odd
* cases, and then a final step to handle odd cases [e.g. newused == odd]
*
* This will not only save ~8*256 = 2KB of stack but lower the number of
* operations required to finally fix up the columns
*/
/* This computes the inner product. To simplify the inner N^2 loop
* the multiplication by two is done afterwards in the N loop. * the multiplication by two is done afterwards in the N loop.
*/ */
for (ix = 0; ix < pa; ix++) { for (ix = 0; ix < pa; ix++) {
@ -101,18 +92,19 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
} }
/* setup dest */ /* setup dest */
olduse = b->used; olduse = b->used;
b->used = newused; b->used = newused;
/* double first value, since the inner products are half of what they should be */
W[0] += W[0] + W2[0];
/* now compute digits */ /* now compute digits */
{ {
register mp_digit *tmpb; register mp_digit *tmpb;
tmpb = b->dp; /* double first value, since the inner products are
* half of what they should be
*/
W[0] += W[0] + W2[0];
tmpb = b->dp;
for (ix = 1; ix < newused; ix++) { for (ix = 1; ix < newused; ix++) {
/* double/add next digit */ /* double/add next digit */
W[ix] += W[ix] + W2[ix]; W[ix] += W[ix] + W2[ix];
@ -120,9 +112,13 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT)); W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
*tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
} }
/* set the last value. Note even if the carry is zero
* this is required since the next step will not zero
* it if b originally had a value at b->dp[2*a.used]
*/
*tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK)); *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
/* clear high */ /* clear high digits */
for (; ix < olduse; ix++) { for (; ix < olduse; ix++) {
*tmpb++ = 0; *tmpb++ = 0;
} }

View File

@ -14,7 +14,7 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* computes a = 2^b /* computes a = 2**b
* *
* Simple algorithm which zeroes the int, grows it then just sets one bit * Simple algorithm which zeroes the int, grows it then just sets one bit
* as required. * as required.

View File

@ -21,7 +21,7 @@ mp_copy (mp_int * a, mp_int * b)
int res, n; int res, n;
/* if dst == src do nothing */ /* if dst == src do nothing */
if (a == b || a->dp == b->dp) { if (a == b) {
return MP_OKAY; return MP_OKAY;
} }

View File

@ -21,11 +21,15 @@ mp_count_bits (mp_int * a)
int r; int r;
mp_digit q; mp_digit q;
/* shortcut */
if (a->used == 0) { if (a->used == 0) {
return 0; return 0;
} }
/* get number of digits and add that */
r = (a->used - 1) * DIGIT_BIT; r = (a->used - 1) * DIGIT_BIT;
/* take the last digit and count the bits in it */
q = a->dp[a->used - 1]; q = a->dp[a->used - 1];
while (q > ((mp_digit) 0)) { while (q > ((mp_digit) 0)) {
++r; ++r;

View File

@ -14,7 +14,7 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* shift right by a certain bit count (store quotient in c, remainder in d) */ /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
int int
mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
{ {
@ -81,7 +81,6 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
} }
} }
mp_clamp (c); mp_clamp (c);
res = MP_OKAY;
if (d != NULL) { if (d != NULL) {
mp_exch (&t, d); mp_exch (&t, d);
} }

64
bn_mp_div_3.c Normal file
View File

@ -0,0 +1,64 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* divide by three (based on routine from MPI and the GMP manual) */
int
mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
{
mp_int q;
mp_word w, t;
mp_digit b;
int res, ix;
/* b = 2**DIGIT_BIT / 3 */
b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
return res;
}
q.used = a->used;
q.sign = a->sign;
w = 0;
for (ix = a->used - 1; ix >= 0; ix--) {
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
if (w >= 3) {
t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
w -= (t << ((mp_word)1)) + t;
while (w >= 3) {
t += 1;
w -= 3;
}
} else {
t = 0;
}
q.dp[ix] = t;
}
if (d != NULL) {
*d = w;
}
if (c != NULL) {
mp_clamp(&q);
mp_exch(&q, c);
}
mp_clear(&q);
return res;
}

View File

@ -14,31 +14,51 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* single digit division */ /* single digit division (based on routine from MPI) */
int int
mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{ {
mp_int t, t2; mp_int q;
int res; mp_word w, t;
int res, ix;
if ((res = mp_init (&t)) != MP_OKAY) {
return res; if (b == 0) {
return MP_VAL;
} }
if ((res = mp_init (&t2)) != MP_OKAY) { if (b == 3) {
mp_clear (&t); return mp_div_3(a, c, d);
return res;
} }
mp_set (&t, b); if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
res = mp_div (a, &t, c, &t2); return res;
}
/* set remainder if not null */
q.used = a->used;
q.sign = a->sign;
w = 0;
for (ix = a->used - 1; ix >= 0; ix--) {
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
if (w >= b) {
t = w / b;
w = w % b;
} else {
t = 0;
}
q.dp[ix] = t;
}
if (d != NULL) { if (d != NULL) {
*d = t2.dp[0]; *d = w;
} }
mp_clear (&t); if (c != NULL) {
mp_clear (&t2); mp_clamp(&q);
mp_exch(&q, c);
}
mp_clear(&q);
return res; return res;
} }

View File

@ -14,7 +14,7 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* reduce "a" in place modulo "b" using the Diminished Radix algorithm. /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
* *
* Based on algorithm from the paper * Based on algorithm from the paper
* *
@ -23,107 +23,64 @@
* POSTECH Information Research Laboratories * POSTECH Information Research Laboratories
* *
* The modulus must be of a special format [see manual] * The modulus must be of a special format [see manual]
*
* Has been modified to use algorithm 7.10 from the LTM book instead
*/ */
int int
mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp) mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
{ {
int err, i, j, k; int err, i, m;
mp_word r; mp_word r;
mp_digit mu, *tmpj, *tmpi; mp_digit mu, *tmpx1, *tmpx2;
/* k = digits in modulus */ /* m = digits in modulus */
k = b->used; m = n->used;
/* ensure that "a" has at least 2k digits */ /* ensure that "x" has at least 2m digits */
if (a->alloc < k + k) { if (x->alloc < m + m) {
if ((err = mp_grow (a, k + k)) != MP_OKAY) { if ((err = mp_grow (x, m + m)) != MP_OKAY) {
return err; return err;
} }
} }
/* alias for a->dp[i] */ /* top of loop, this is where the code resumes if
tmpi = a->dp + k + k - 1; * another reduction pass is required.
*/
/* for (i = 2k - 1; i >= k; i = i - 1) top:
* /* aliases for digits */
* This is the main loop of the reduction. Note that at the end /* alias for lower half of x */
* the words above position k are not zeroed as expected. The end tmpx1 = x->dp;
* result is that the digits from 0 to k-1 are the residue. So
* we have to clear those afterwards. /* alias for upper half of x, or x/B**m */
*/ tmpx2 = x->dp + m;
for (i = k + k - 1; i >= k; i = i - 1) {
/* x[i - 1 : i - k] += x[i]*mp */ /* set carry to zero */
mu = 0;
/* x[i] * mp */
r = ((mp_word) *tmpi--) * ((mp_word) mp); /* compute (x mod B**m) + mp * [x/B**m] inline and inplace */
for (i = 0; i < m; i++) {
/* now add r to x[i-1:i-k] r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
* *tmpx1++ = r & MP_MASK;
* First add it to the first digit x[i-k] then form the carry mu = r >> ((mp_word)DIGIT_BIT);
* then enter the main loop
*/
j = i - k;
/* alias for a->dp[j] */
tmpj = a->dp + j;
/* add digit */
*tmpj += (mp_digit)(r & MP_MASK);
/* this is the carry */
mu = (r >> ((mp_word) DIGIT_BIT)) + (*tmpj >> DIGIT_BIT);
/* clear carry from a->dp[j] */
*tmpj++ &= MP_MASK;
/* now add rest of the digits
*
* Note this is basically a simple single digit addition to
* a larger multiple digit number. This is optimized somewhat
* because the propagation of carries is not likely to move
* more than a few digits.
*
*/
for (++j; mu != 0 && j <= (i - 1); ++j) {
*tmpj += mu;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
}
/* if final carry */
if (mu != 0) {
/* add mp to this to correct */
j = i - k;
tmpj = a->dp + j;
*tmpj += mp;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
/* now handle carries */
for (++j; mu != 0 && j <= (i - 1); j++) {
*tmpj += mu;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
}
}
} }
/* zero words above k */ /* set final carry */
tmpi = a->dp + k; *tmpx1++ = mu;
for (i = k; i < a->used; i++) {
*tmpi++ = 0; /* zero words above m */
for (i = m + 1; i < x->used; i++) {
*tmpx1++ = 0;
} }
/* clamp, sub and return */ /* clamp, sub and return */
mp_clamp (a); mp_clamp (x);
/* if a >= b [b == modulus] then subtract the modulus to fix up */ /* if x >= n then subtract and reduce again
if (mp_cmp_mag (a, b) != MP_LT) { * Each successive "recursion" makes the input smaller and smaller.
return s_mp_sub (a, b, a); */
if (mp_cmp_mag (x, n) != MP_LT) {
s_mp_sub(x, n, x);
goto top;
} }
return MP_OKAY; return MP_OKAY;
} }

View File

@ -20,6 +20,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
/* the casts are required if DIGIT_BIT is one less than /* the casts are required if DIGIT_BIT is one less than
* the number of bits in a mp_digit [e.g. DIGIT_BIT==31] * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
*/ */
*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - ((mp_word)a->dp[0])); *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
((mp_word)a->dp[0]));
} }

View File

@ -14,7 +14,7 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* calculate c = a^b using a square-multiply algorithm */ /* calculate c = a**b using a square-multiply algorithm */
int int
mp_expt_d (mp_int * a, mp_digit b, mp_int * c) mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
{ {

View File

@ -14,7 +14,6 @@
*/ */
#include <tommath.h> #include <tommath.h>
static int f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
/* this is a shell function that calls either the normal or Montgomery /* this is a shell function that calls either the normal or Montgomery
* exptmod functions. Originally the call to the montgomery code was * exptmod functions. Originally the call to the montgomery code was
@ -55,212 +54,22 @@ mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
return err; return err;
} }
/* and now compute (1/G)^|X| instead of G^X [X < 0] */ /* and now compute (1/G)**|X| instead of G**X [X < 0] */
err = mp_exptmod(&tmpG, &tmpX, P, Y); err = mp_exptmod(&tmpG, &tmpX, P, Y);
mp_clear_multi(&tmpG, &tmpX, NULL); mp_clear_multi(&tmpG, &tmpX, NULL);
return err; return err;
} }
dr = mp_dr_is_modulus(P); dr = mp_dr_is_modulus(P);
if (dr == 0) {
dr = mp_reduce_is_2k(P) << 1;
}
/* if the modulus is odd use the fast method */ /* if the modulus is odd use the fast method */
if ((mp_isodd (P) == 1 || dr == 1) && P->used > 4) { if ((mp_isodd (P) == 1 || dr != 0) && P->used > 4) {
return mp_exptmod_fast (G, X, P, Y, dr); return mp_exptmod_fast (G, X, P, Y, dr);
} else { } else {
return f_mp_exptmod (G, X, P, Y); return s_mp_exptmod (G, X, P, Y);
} }
} }
static int
f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
mp_int M[256], res, mu;
mp_digit buf;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = mp_count_bits (X);
if (x <= 7) {
winsize = 2;
} else if (x <= 36) {
winsize = 3;
} else if (x <= 140) {
winsize = 4;
} else if (x <= 450) {
winsize = 5;
} else if (x <= 1303) {
winsize = 6;
} else if (x <= 3529) {
winsize = 7;
} else {
winsize = 8;
}
#ifdef MP_LOW_MEM
if (winsize > 5) {
winsize = 5;
}
#endif
/* init G array */
for (x = 0; x < (1 << winsize); x++) {
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
}
return err;
}
}
/* create mu, used for Barrett reduction */
if ((err = mp_init (&mu)) != MP_OKAY) {
goto __M;
}
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
goto __MU;
}
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
goto __MU;
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
for (x = 0; x < (winsize - 1); x++) {
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* setup result */
if ((err = mp_init (&res)) != MP_OKAY) {
goto __MU;
}
mp_set (&res, 1);
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
}
buf = X->dp[digidx--];
bitcnt = (int) DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0)
continue;
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
continue;
}
/* else we add it to the window */
bitbuf |= (y << (winsize - ++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __MU;
}
/* empty window and reset */
bitcpy = bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
}
}
mp_exch (&res, Y);
err = MP_OKAY;
__RES:mp_clear (&res);
__MU:mp_clear (&mu);
__M:
for (x = 0; x < (1 << winsize); x++) {
mp_clear (&M[x]);
}
return err;
}

View File

@ -27,6 +27,11 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
mp_int M[256], res; mp_int M[256], res;
mp_digit buf, mp; mp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* use a pointer to the reduction algorithm. This allows us to use
* one of many reduction algorithms without modding the guts of
* the code with if statements everywhere.
*/
int (*redux)(mp_int*,mp_int*,mp_digit); int (*redux)(mp_int*,mp_int*,mp_digit);
/* find window size */ /* find window size */
@ -64,6 +69,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
} }
} }
/* determine and setup reduction code */
if (redmode == 0) { if (redmode == 0) {
/* now setup montgomery */ /* now setup montgomery */
if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
@ -71,17 +77,23 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
} }
/* automatically pick the comba one if available (saves quite a few calls/ifs) */ /* automatically pick the comba one if available (saves quite a few calls/ifs) */
if ( ((P->used * 2 + 1) < MP_WARRAY) && if (((P->used * 2 + 1) < MP_WARRAY) &&
P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
redux = fast_mp_montgomery_reduce; redux = fast_mp_montgomery_reduce;
} else { } else {
/* use slower baselien method */ /* use slower baselien method */
redux = mp_montgomery_reduce; redux = mp_montgomery_reduce;
} }
} else { } else if (redmode == 1) {
/* setup DR reduction */ /* setup DR reduction */
mp_dr_setup(P, &mp); mp_dr_setup(P, &mp);
redux = mp_dr_reduce; redux = mp_dr_reduce;
} else {
/* setup 2k reduction */
if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
goto __M;
}
redux = mp_reduce_2k;
} }
/* setup result */ /* setup result */
@ -142,7 +154,8 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
bitcnt = 1; bitcnt = 1;
buf = 0; buf = 0;
digidx = X->used - 1; digidx = X->used - 1;
bitcpy = bitbuf = 0; bitcpy = 0;
bitbuf = 0;
for (;;) { for (;;) {
/* grab next digit as required */ /* grab next digit as required */
@ -203,7 +216,8 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
} }
/* empty window and reset */ /* empty window and reset */
bitcpy = bitbuf = 0; bitcpy = 0;
bitbuf = 0;
mode = 1; mode = 1;
} }
} }
@ -233,7 +247,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
} }
if (redmode == 0) { if (redmode == 0) {
/* fixup result */ /* fixup result if Montgomery reduction is used */
if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) { if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
goto __RES; goto __RES;
} }

View File

@ -24,7 +24,7 @@ mp_init (mp_int * a)
return MP_MEM; return MP_MEM;
} }
/* set the used to zero, allocated digit to the default precision /* set the used to zero, allocated digits to the default precision
* and sign to positive */ * and sign to positive */
a->used = 0; a->used = 0;
a->alloc = MP_PREC; a->alloc = MP_PREC;

View File

@ -14,24 +14,34 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* c = |a| * |b| using Karatsuba Multiplication using three half size multiplications /* c = |a| * |b| using Karatsuba Multiplication using
* three half size multiplications
* *
* Let B represent the radix [e.g. 2**DIGIT_BIT] and let n represent half of the number of digits in the min(a,b) * Let B represent the radix [e.g. 2**DIGIT_BIT] and
* let n represent half of the number of digits in
* the min(a,b)
* *
* a = a1 * B^n + a0 * a = a1 * B**n + a0
* b = b1 * B^n + b0 * b = b1 * B**n + b0
* *
* Then, a * b => a1b1 * B^2n + ((a1 - b1)(a0 - b0) + a0b0 + a1b1) * B + a0b0 * Then, a * b =>
a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
* *
* Note that a1b1 and a0b0 are used twice and only need to be computed once. So in total * Note that a1b1 and a0b0 are used twice and only need to be
* three half size (half # of digit) multiplications are performed, a0b0, a1b1 and (a1-b1)(a0-b0) * computed once. So in total three half size (half # of
* digit) multiplications are performed, a0b0, a1b1 and
* (a1-b1)(a0-b0)
* *
* Note that a multiplication of half the digits requires 1/4th the number of single precision * Note that a multiplication of half the digits requires
* multiplications so in total after one call 25% of the single precision multiplications are saved. * 1/4th the number of single precision multiplications so in
* Note also that the call to mp_mul can end up back in this function if the a0, a1, b0, or b1 are above * total after one call 25% of the single precision multiplications
* the threshold. This is known as divide-and-conquer and leads to the famous O(N^lg(3)) or O(N^1.584) work which * are saved. Note also that the call to mp_mul can end up back
* is asymptopically lower than the standard O(N^2) that the baseline/comba methods use. Generally though the * in this function if the a0, a1, b0, or b1 are above the threshold.
* overhead of this method doesn't pay off until a certain size (N ~ 80) is reached. * This is known as divide-and-conquer and leads to the famous
* O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
* the standard O(N**2) that the baseline/comba methods use.
* Generally though the overhead of this method doesn't pay off
* until a certain size (N ~ 80) is reached.
*/ */
int int
mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
@ -101,14 +111,15 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
} }
} }
/* only need to clamp the lower words since by definition the upper words x1/y1 must /* only need to clamp the lower words since by definition the
* have a known number of digits * upper words x1/y1 must have a known number of digits
*/ */
mp_clamp (&x0); mp_clamp (&x0);
mp_clamp (&y0); mp_clamp (&y0);
/* now calc the products x0y0 and x1y1 */ /* now calc the products x0y0 and x1y1 */
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) /* after this x0 is no longer required, free temp [x0==t2]! */ /* after this x0 is no longer required, free temp [x0==t2]! */
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
goto X1Y1; /* x0y0 = x0*y0 */ goto X1Y1; /* x0y0 = x0*y0 */
if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
goto X1Y1; /* x1y1 = x1*y1 */ goto X1Y1; /* x1y1 = x1*y1 */

View File

@ -14,10 +14,12 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* Karatsuba squaring, computes b = a*a using three half size squarings /* Karatsuba squaring, computes b = a*a using three
* half size squarings
* *
* See comments of mp_karatsuba_mul for details. It is essentially the same algorithm * See comments of mp_karatsuba_mul for details. It
* but merely tuned to perform recursive squarings. * is essentially the same algorithm but merely
* tuned to perform recursive squarings.
*/ */
int int
mp_karatsuba_sqr (mp_int * a, mp_int * b) mp_karatsuba_sqr (mp_int * a, mp_int * b)
@ -74,32 +76,32 @@ mp_karatsuba_sqr (mp_int * a, mp_int * b)
/* now calc the products x0*x0 and x1*x1 */ /* now calc the products x0*x0 and x1*x1 */
if (mp_sqr (&x0, &x0x0) != MP_OKAY) if (mp_sqr (&x0, &x0x0) != MP_OKAY)
goto X1X1; /* x0x0 = x0*x0 */ goto X1X1; /* x0x0 = x0*x0 */
if (mp_sqr (&x1, &x1x1) != MP_OKAY) if (mp_sqr (&x1, &x1x1) != MP_OKAY)
goto X1X1; /* x1x1 = x1*x1 */ goto X1X1; /* x1x1 = x1*x1 */
/* now calc (x1-x0)^2 */ /* now calc (x1-x0)**2 */
if (mp_sub (&x1, &x0, &t1) != MP_OKAY) if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
goto X1X1; /* t1 = x1 - x0 */ goto X1X1; /* t1 = x1 - x0 */
if (mp_sqr (&t1, &t1) != MP_OKAY) if (mp_sqr (&t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
/* add x0y0 */ /* add x0y0 */
if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
goto X1X1; /* t2 = x0y0 + x1y1 */ goto X1X1; /* t2 = x0x0 + x1x1 */
if (mp_sub (&t2, &t1, &t1) != MP_OKAY) if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
/* shift by B */ /* shift by B */
if (mp_lshd (&t1, B) != MP_OKAY) if (mp_lshd (&t1, B) != MP_OKAY)
goto X1X1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
if (mp_lshd (&x1x1, B * 2) != MP_OKAY) if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
goto X1X1; /* x1y1 = x1y1 << 2*B */ goto X1X1; /* x1x1 = x1x1 << 2*B */
if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + t1 */ goto X1X1; /* t1 = x0x0 + t1 */
if (mp_add (&t1, &x1x1, b) != MP_OKAY) if (mp_add (&t1, &x1x1, b) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + t1 + x1y1 */ goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
err = MP_OKAY; err = MP_OKAY;

View File

@ -33,29 +33,29 @@ mp_lshd (mp_int * a, int b)
} }
{ {
register mp_digit *tmpa, *tmpaa; register mp_digit *top, *bottom;
/* increment the used by the shift amount than copy upwards */ /* increment the used by the shift amount then copy upwards */
a->used += b; a->used += b;
/* top */ /* top */
tmpa = a->dp + a->used - 1; top = a->dp + a->used - 1;
/* base */ /* base */
tmpaa = a->dp + a->used - 1 - b; bottom = a->dp + a->used - 1 - b;
/* much like mp_rshd this is implemented using a sliding window /* much like mp_rshd this is implemented using a sliding window
* except the window goes the otherway around. Copying from * except the window goes the otherway around. Copying from
* the bottom to the top. see bn_mp_rshd.c for more info. * the bottom to the top. see bn_mp_rshd.c for more info.
*/ */
for (x = a->used - 1; x >= b; x--) { for (x = a->used - 1; x >= b; x--) {
*tmpa-- = *tmpaa--; *top-- = *bottom--;
} }
/* zero the lower digits */ /* zero the lower digits */
tmpa = a->dp; top = a->dp;
for (x = 0; x < b; x++) { for (x = 0; x < b; x++) {
*tmpa++ = 0; *top++ = 0;
} }
} }
return MP_OKAY; return MP_OKAY;

View File

@ -17,31 +17,5 @@
int int
mp_mod_d (mp_int * a, mp_digit b, mp_digit * c) mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
{ {
mp_int t, t2; return mp_div_d(a, b, NULL, c);
int res;
if ((res = mp_init (&t)) != MP_OKAY) {
return res;
}
if ((res = mp_init (&t2)) != MP_OKAY) {
mp_clear (&t);
return res;
}
mp_set (&t, b);
mp_div (a, &t, NULL, &t2);
if (t2.sign == MP_NEG) {
if ((res = mp_add_d (&t2, b, &t2)) != MP_OKAY) {
mp_clear (&t);
mp_clear (&t2);
return res;
}
}
*c = t2.dp[0];
mp_clear (&t);
mp_clear (&t2);
return MP_OKAY;
} }

View File

@ -14,12 +14,12 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* computes xR^-1 == x (mod N) via Montgomery Reduction */ /* computes xR**-1 == x (mod N) via Montgomery Reduction */
int int
mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp) mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{ {
int ix, res, digs; int ix, res, digs;
mp_digit ui; mp_digit mu;
/* can the fast reduction [comba] method be used? /* can the fast reduction [comba] method be used?
* *
@ -27,55 +27,60 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
* than the available columns [255 per default] since carries * than the available columns [255 per default] since carries
* are fixed up in the inner loop. * are fixed up in the inner loop.
*/ */
digs = m->used * 2 + 1; digs = n->used * 2 + 1;
if ((digs < MP_WARRAY) if ((digs < MP_WARRAY) &&
&& m->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { n->used <
return fast_mp_montgomery_reduce (a, m, mp); (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_mp_montgomery_reduce (x, n, rho);
} }
/* grow the input as required */ /* grow the input as required */
if (a->alloc < m->used * 2 + 1) { if (x->alloc < digs) {
if ((res = mp_grow (a, m->used * 2 + 1)) != MP_OKAY) { if ((res = mp_grow (x, digs)) != MP_OKAY) {
return res; return res;
} }
} }
a->used = m->used * 2 + 1; x->used = digs;
for (ix = 0; ix < m->used; ix++) { for (ix = 0; ix < n->used; ix++) {
/* ui = ai * m' mod b */ /* mu = ai * m' mod b */
ui = (a->dp[ix] * mp) & MP_MASK; mu = (x->dp[ix] * rho) & MP_MASK;
/* a = a + ui * m * b^i */ /* a = a + mu * m * b**i */
{ {
register int iy; register int iy;
register mp_digit *tmpx, *tmpy, mu; register mp_digit *tmpn, *tmpx, u;
register mp_word r; register mp_word r;
/* aliases */ /* aliases */
tmpx = m->dp; tmpn = n->dp;
tmpy = a->dp + ix; tmpx = x->dp + ix;
mu = 0; /* set the carry to zero */
for (iy = 0; iy < m->used; iy++) { u = 0;
r = ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) + ((mp_word) * tmpy);
mu = (r >> ((mp_word) DIGIT_BIT)); /* Multiply and add in place */
*tmpy++ = (r & ((mp_word) MP_MASK)); for (iy = 0; iy < n->used; iy++) {
r = ((mp_word) mu) * ((mp_word) * tmpn++) +
((mp_word) u) + ((mp_word) * tmpx);
u = (r >> ((mp_word) DIGIT_BIT));
*tmpx++ = (r & ((mp_word) MP_MASK));
} }
/* propagate carries */ /* propagate carries */
while (mu) { while (u) {
*tmpy += mu; *tmpx += u;
mu = (*tmpy >> DIGIT_BIT) & 1; u = *tmpx >> DIGIT_BIT;
*tmpy++ &= MP_MASK; *tmpx++ &= MP_MASK;
} }
} }
} }
/* A = A/b^n */ /* x = x/b**n.used */
mp_rshd (a, m->used); mp_rshd (x, n->used);
/* if A >= m then A = A - m */ /* if A >= m then A = A - m */
if (mp_cmp_mag (a, m) != MP_LT) { if (mp_cmp_mag (x, n) != MP_LT) {
return s_mp_sub (a, m, a); return s_mp_sub (x, n, x);
} }
return MP_OKAY; return MP_OKAY;

View File

@ -16,38 +16,38 @@
/* setups the montgomery reduction stuff */ /* setups the montgomery reduction stuff */
int int
mp_montgomery_setup (mp_int * a, mp_digit * mp) mp_montgomery_setup (mp_int * n, mp_digit * rho)
{ {
mp_digit x, b; mp_digit x, b;
/* fast inversion mod 2^k /* fast inversion mod 2**k
* *
* Based on the fact that * Based on the fact that
* *
* XA = 1 (mod 2^n) => (X(2-XA)) A = 1 (mod 2^2n) * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
* => 2*X*A - X*X*A*A = 1 * => 2*X*A - X*X*A*A = 1
* => 2*(1) - (1) = 1 * => 2*(1) - (1) = 1
*/ */
b = a->dp[0]; b = n->dp[0];
if ((b & 1) == 0) { if ((b & 1) == 0) {
return MP_VAL; return MP_VAL;
} }
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2^4 */ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
x *= 2 - b * x; /* here x*a==1 mod 2^8 */ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
#if !defined(MP_8BIT) #if !defined(MP_8BIT)
x *= 2 - b * x; /* here x*a==1 mod 2^16; each step doubles the nb of bits */ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
#endif #endif
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT)) #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
x *= 2 - b * x; /* here x*a==1 mod 2^32 */ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
#endif #endif
#ifdef MP_64BIT #ifdef MP_64BIT
x *= 2 - b * x; /* here x*a==1 mod 2^64 */ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
#endif #endif
/* t = -1/m mod b */ /* rho = -1/m mod b */
*mp = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK; *rho = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
return MP_OKAY; return MP_OKAY;
} }

View File

@ -20,19 +20,24 @@ mp_mul (mp_int * a, mp_int * b, mp_int * c)
{ {
int res, neg; int res, neg;
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
if (MIN (a->used, b->used) > KARATSUBA_MUL_CUTOFF) {
if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
res = mp_toom_mul(a, b, c);
} else if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
res = mp_karatsuba_mul (a, b, c); res = mp_karatsuba_mul (a, b, c);
} else { } else {
/* can we use the fast multiplier? /* can we use the fast multiplier?
* *
* The fast multiplier can be used if the output will have less than * The fast multiplier can be used if the output will
* MP_WARRAY digits and the number of digits won't affect carry propagation * have less than MP_WARRAY digits and the number of
* digits won't affect carry propagation
*/ */
int digs = a->used + b->used + 1; int digs = a->used + b->used + 1;
if ((digs < MP_WARRAY) if ((digs < MP_WARRAY) &&
&& MIN(a->used, b->used) <= (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { MIN(a->used, b->used) <=
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
res = fast_s_mp_mul_digs (a, b, c, digs); res = fast_s_mp_mul_digs (a, b, c, digs);
} else { } else {
res = s_mp_mul (a, b, c); res = s_mp_mul (a, b, c);

View File

@ -14,22 +14,8 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* pre-calculate the value required for Barrett reduction /* reduces x mod m, assumes 0 < x < m**2, mu is
* For a given modulus "b" it calulates the value required in "a" * precomputed via mp_reduce_setup.
*/
int
mp_reduce_setup (mp_int * a, mp_int * b)
{
int res;
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
return res;
}
res = mp_div (a, b, a, NULL);
return res;
}
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
* From HAC pp.604 Algorithm 14.42 * From HAC pp.604 Algorithm 14.42
*/ */
int int
@ -38,11 +24,12 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
mp_int q; mp_int q;
int res, um = m->used; int res, um = m->used;
/* q = x */
if ((res = mp_init_copy (&q, x)) != MP_OKAY) { if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
return res; return res;
} }
/* q1 = x / b^(k-1) */ /* q1 = x / b**(k-1) */
mp_rshd (&q, um - 1); mp_rshd (&q, um - 1);
/* according to HAC this is optimization is ok */ /* according to HAC this is optimization is ok */
@ -56,15 +43,15 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
} }
} }
/* q3 = q2 / b^(k+1) */ /* q3 = q2 / b**(k+1) */
mp_rshd (&q, um + 1); mp_rshd (&q, um + 1);
/* x = x mod b^(k+1), quick (no division) */ /* x = x mod b**(k+1), quick (no division) */
if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
goto CLEANUP; goto CLEANUP;
} }
/* q = q * m mod b^(k+1), quick (no division) */ /* q = q * m mod b**(k+1), quick (no division) */
if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
goto CLEANUP; goto CLEANUP;
} }
@ -74,7 +61,7 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
goto CLEANUP; goto CLEANUP;
} }
/* If x < 0, add b^(k+1) to it */ /* If x < 0, add b**(k+1) to it */
if (mp_cmp_d (x, 0) == MP_LT) { if (mp_cmp_d (x, 0) == MP_LT) {
mp_set (&q, 1); mp_set (&q, 1);
if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
@ -89,7 +76,7 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
break; break;
} }
} }
CLEANUP: CLEANUP:
mp_clear (&q); mp_clear (&q);

56
bn_mp_reduce_2k.c Normal file
View File

@ -0,0 +1,56 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* reduces a modulo n where n is of the form 2**p - k */
int
mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k)
{
mp_int q;
int p, res;
if ((res = mp_init(&q)) != MP_OKAY) {
return res;
}
p = mp_count_bits(n);
top:
/* q = a/2**p, a = a mod 2**p */
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
goto ERR;
}
if (k != 1) {
/* q = q * k */
if ((res = mp_mul_d(&q, k, &q)) != MP_OKAY) {
goto ERR;
}
}
/* a = a + q */
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
goto ERR;
}
if (mp_cmp_mag(a, n) != MP_LT) {
s_mp_sub(a, n, a);
goto top;
}
ERR:
mp_clear(&q);
return res;
}

42
bn_mp_reduce_2k_setup.c Normal file
View File

@ -0,0 +1,42 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* determines the setup value */
int
mp_reduce_2k_setup(mp_int *a, mp_digit *d)
{
int res, p;
mp_int tmp;
if ((res = mp_init(&tmp)) != MP_OKAY) {
return res;
}
p = mp_count_bits(a);
if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
mp_clear(&tmp);
return res;
}
if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
mp_clear(&tmp);
return res;
}
*d = tmp.dp[0];
mp_clear(&tmp);
return MP_OKAY;
}

37
bn_mp_reduce_is_2k.c Normal file
View File

@ -0,0 +1,37 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* determines if mp_reduce_2k can be used */
int
mp_reduce_is_2k(mp_int *a)
{
int ix, iy;
if (a->used == 0) {
return 0;
} else if (a->used == 1) {
return 1;
} else if (a->used > 1) {
iy = mp_count_bits(a);
for (ix = DIGIT_BIT; ix < iy; ix++) {
if ((a->dp[ix/DIGIT_BIT] & ((mp_digit)1 << (mp_digit)(ix % DIGIT_BIT))) == 0) {
return 0;
}
}
}
return 1;
}

29
bn_mp_reduce_setup.c Normal file
View File

@ -0,0 +1,29 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* pre-calculate the value required for Barrett reduction
* For a given modulus "b" it calulates the value required in "a"
*/
int
mp_reduce_setup (mp_int * a, mp_int * b)
{
int res;
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
return res;
}
return mp_div (a, b, a, NULL);
}

View File

@ -32,15 +32,15 @@ mp_rshd (mp_int * a, int b)
} }
{ {
register mp_digit *tmpa, *tmpaa; register mp_digit *bottom, *top;
/* shift the digits down */ /* shift the digits down */
/* base */ /* bottom */
tmpa = a->dp; bottom = a->dp;
/* offset into digits */ /* top [offset into digits] */
tmpaa = a->dp + b; top = a->dp + b;
/* this is implemented as a sliding window where /* this is implemented as a sliding window where
* the window is b-digits long and digits from * the window is b-digits long and digits from
@ -53,13 +53,15 @@ mp_rshd (mp_int * a, int b)
\-------------------/ ----> \-------------------/ ---->
*/ */
for (x = 0; x < (a->used - b); x++) { for (x = 0; x < (a->used - b); x++) {
*tmpa++ = *tmpaa++; *bottom++ = *top++;
} }
/* zero the top digits */ /* zero the top digits */
for (; x < a->used; x++) { for (; x < a->used; x++) {
*tmpa++ = 0; *bottom++ = 0;
} }
} }
mp_clamp (a);
/* remove excess digits */
a->used -= b;
} }

View File

@ -35,7 +35,7 @@ mp_set_int (mp_int * a, unsigned int b)
b <<= 4; b <<= 4;
/* ensure that digits are not clamped off */ /* ensure that digits are not clamped off */
a->used += 32 / DIGIT_BIT + 2; a->used += 1;
} }
mp_clamp (a); mp_clamp (a);
return MP_OKAY; return MP_OKAY;

View File

@ -19,12 +19,16 @@ int
mp_sqr (mp_int * a, mp_int * b) mp_sqr (mp_int * a, mp_int * b)
{ {
int res; int res;
if (a->used > KARATSUBA_SQR_CUTOFF) { if (a->used >= TOOM_SQR_CUTOFF) {
res = mp_toom_sqr(a, b);
} else if (a->used >= KARATSUBA_SQR_CUTOFF) {
res = mp_karatsuba_sqr (a, b); res = mp_karatsuba_sqr (a, b);
} else { } else {
/* can we use the fast multiplier? */ /* can we use the fast multiplier? */
if ((a->used * 2 + 1) < 512 && a->used < (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { if ((a->used * 2 + 1) < MP_WARRAY &&
a->used <
(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
res = fast_s_mp_sqr (a, b); res = fast_s_mp_sqr (a, b);
} else { } else {
res = s_mp_sqr (a, b); res = s_mp_sqr (a, b);

268
bn_mp_toom_mul.c Normal file
View File

@ -0,0 +1,268 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* multiplication using Toom-Cook 3-way algorithm */
int
mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
{
mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
int res, B;
/* init temps */
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
return res;
}
/* B */
B = MIN(a->used, b->used) / 3;
/* a = a2 * B^2 + a1 * B + a0 */
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a1, B);
mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a2, B*2);
/* b = b2 * B^2 + b1 * B + b0 */
if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_copy(b, &b1)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&b1, B);
mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
if ((res = mp_copy(b, &b2)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&b2, B*2);
/* w0 = a0*b0 */
if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
goto ERR;
}
/* w4 = a2 * b2 */
if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
goto ERR;
}
/* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
goto ERR;
}
/* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
goto ERR;
}
/* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
goto ERR;
}
/* now solve the matrix
0 0 0 0 1
1 2 4 8 16
1 1 1 1 1
16 8 4 2 1
1 0 0 0 0
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication
*/
/* r1 - r4 */
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r0 */
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/2 */
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3/2 */
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
goto ERR;
}
/* r2 - r0 - r4 */
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1 - 8r0 */
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - 8r4 */
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
goto ERR;
}
/* 3r2 - r1 - r3 */
if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/3 */
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
goto ERR;
}
/* r3/3 */
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
goto ERR;
}
/* at this point shift W[n] by B*n */
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
goto ERR;
}
ERR:
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL);
return res;
}

220
bn_mp_toom_sqr.c Normal file
View File

@ -0,0 +1,220 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* squaring using Toom-Cook 3-way algorithm */
int
mp_toom_sqr(mp_int *a, mp_int *b)
{
mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
int res, B;
/* init temps */
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
return res;
}
/* B */
B = a->used / 3;
/* a = a2 * B^2 + a1 * B + a0 */
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a1, B);
mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a2, B*2);
/* w0 = a0*a0 */
if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
goto ERR;
}
/* w4 = a2 * a2 */
if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
goto ERR;
}
/* w1 = (a2 + 2(a1 + 2a0))**2 */
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
goto ERR;
}
/* w3 = (a0 + 2(a1 + 2a2))**2 */
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
goto ERR;
}
/* w2 = (a2 + a1 + a0)**2 */
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
goto ERR;
}
/* now solve the matrix
0 0 0 0 1
1 2 4 8 16
1 1 1 1 1
16 8 4 2 1
1 0 0 0 0
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
*/
/* r1 - r4 */
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r0 */
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/2 */
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3/2 */
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
goto ERR;
}
/* r2 - r0 - r4 */
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1 - 8r0 */
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - 8r4 */
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
goto ERR;
}
/* 3r2 - r1 - r3 */
if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/3 */
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
goto ERR;
}
/* r3/3 */
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
goto ERR;
}
/* at this point shift W[n] by B*n */
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
goto ERR;
}
ERR:
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
return res;
}

View File

@ -45,7 +45,6 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
olduse = c->used; olduse = c->used;
c->used = max + 1; c->used = max + 1;
/* set the carry to zero */
{ {
register mp_digit u, *tmpa, *tmpb, *tmpc; register mp_digit u, *tmpa, *tmpb, *tmpc;
register int i; register int i;

211
bn_s_mp_exptmod.c Normal file
View File

@ -0,0 +1,211 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
int
s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
mp_int M[256], res, mu;
mp_digit buf;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = mp_count_bits (X);
if (x <= 7) {
winsize = 2;
} else if (x <= 36) {
winsize = 3;
} else if (x <= 140) {
winsize = 4;
} else if (x <= 450) {
winsize = 5;
} else if (x <= 1303) {
winsize = 6;
} else if (x <= 3529) {
winsize = 7;
} else {
winsize = 8;
}
#ifdef MP_LOW_MEM
if (winsize > 5) {
winsize = 5;
}
#endif
/* init M array */
for (x = 0; x < (1 << winsize); x++) {
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
}
return err;
}
}
/* create mu, used for Barrett reduction */
if ((err = mp_init (&mu)) != MP_OKAY) {
goto __M;
}
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
goto __MU;
}
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G**x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
goto __MU;
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
for (x = 0; x < (winsize - 1); x++) {
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* setup result */
if ((err = mp_init (&res)) != MP_OKAY) {
goto __MU;
}
mp_set (&res, 1);
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = 0;
bitbuf = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
}
buf = X->dp[digidx--];
bitcnt = (int) DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0)
continue;
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
continue;
}
/* else we add it to the window */
bitbuf |= (y << (winsize - ++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __MU;
}
/* empty window and reset */
bitcpy = 0;
bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
}
}
mp_exch (&res, Y);
err = MP_OKAY;
__RES:mp_clear (&res);
__MU:mp_clear (&mu);
__M:
for (x = 0; x < (1 << winsize); x++) {
mp_clear (&M[x]);
}
return err;
}

View File

@ -20,8 +20,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
{ {
mp_int t; mp_int t;
int res, ix, iy, pa; int res, ix, iy, pa;
mp_word r, u; mp_word r;
mp_digit tmpx, *tmpt; mp_digit u, tmpx, *tmpt;
pa = a->used; pa = a->used;
if ((res = mp_init_size (&t, pa + pa + 1)) != MP_OKAY) { if ((res = mp_init_size (&t, pa + pa + 1)) != MP_OKAY) {
@ -32,7 +32,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
for (ix = 0; ix < pa; ix++) { for (ix = 0; ix < pa; ix++) {
/* first calculate the digit at 2*ix */ /* first calculate the digit at 2*ix */
/* calculate double precision result */ /* calculate double precision result */
r = ((mp_word) t.dp[ix + ix]) + ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]); r = ((mp_word) t.dp[ix + ix]) +
((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
/* store lower part in result */ /* store lower part in result */
t.dp[ix + ix] = (mp_digit) (r & ((mp_word) MP_MASK)); t.dp[ix + ix] = (mp_digit) (r & ((mp_word) MP_MASK));
@ -44,7 +45,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
tmpx = a->dp[ix]; tmpx = a->dp[ix];
/* alias for where to store the results */ /* alias for where to store the results */
tmpt = &(t.dp[ix + ix + 1]); tmpt = t.dp + (ix + ix + 1);
for (iy = ix + 1; iy < pa; iy++) { for (iy = ix + 1; iy < pa; iy++) {
/* first calculate the product */ /* first calculate the product */
r = ((mp_word) tmpx) * ((mp_word) a->dp[iy]); r = ((mp_word) tmpx) * ((mp_word) a->dp[iy]);
@ -60,13 +62,9 @@ s_mp_sqr (mp_int * a, mp_int * b)
/* get carry */ /* get carry */
u = (r >> ((mp_word) DIGIT_BIT)); u = (r >> ((mp_word) DIGIT_BIT));
} }
r = ((mp_word) * tmpt) + u;
*tmpt = (mp_digit) (r & ((mp_word) MP_MASK));
u = (r >> ((mp_word) DIGIT_BIT));
/* propagate upwards */ /* propagate upwards */
++tmpt; while (u != ((mp_digit) 0)) {
while (u != ((mp_word) 0)) { r = ((mp_word) * tmpt) + ((mp_word) u);
r = ((mp_word) * tmpt) + ((mp_word) 1);
*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
u = (r >> ((mp_word) DIGIT_BIT)); u = (r >> ((mp_word) DIGIT_BIT));
} }

View File

@ -33,7 +33,6 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
olduse = c->used; olduse = c->used;
c->used = max; c->used = max;
/* sub digits from lower part */
{ {
register mp_digit u, *tmpa, *tmpb, *tmpc; register mp_digit u, *tmpa, *tmpb, *tmpc;
register int i; register int i;
@ -52,7 +51,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
/* U = carry bit of T[i] /* U = carry bit of T[i]
* Note this saves performing an AND operation since * Note this saves performing an AND operation since
* if a carry does occur it will propagate all the way to the * if a carry does occur it will propagate all the way to the
* MSB. As a result a single shift is required to get the carry * MSB. As a result a single shift is enough to get the carry
*/ */
u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
@ -81,3 +80,4 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
mp_clamp (c); mp_clamp (c);
return MP_OKAY; return MP_OKAY;
} }

View File

@ -18,11 +18,14 @@
CPU /Compiler /MUL CUTOFF/SQR CUTOFF CPU /Compiler /MUL CUTOFF/SQR CUTOFF
------------------------------------------------------------- -------------------------------------------------------------
Intel P4 /GCC v3.2 / 81/ 110 Intel P4 /GCC v3.2 / 70/ 108
AMD Athlon XP /GCC v3.2 / 109/ 127 AMD Athlon XP /GCC v3.2 / 109/ 127
*/ */
/* configured for a AMD XP Thoroughbred core with etc/tune.c */ /* configured for a AMD XP Thoroughbred core with etc/tune.c */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */ int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 127; /* Min. number of digits before Karatsuba squaring is used. */ KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400;

View File

@ -1,3 +1,15 @@
May 29th, 2003
v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not very elegantly.
(e.g. correct result, just bad looking code)
-- Fixed bug in mp_sqr which still had a 512 constant instead of MP_WARRAY
-- Added Toom-Cook multipliers [needs tuning!]
-- Added efficient divide by 3 algorithm mp_div_3
-- Re-wrote mp_div_d to be faster than calling mp_div
-- Added in a donated BCC makefile and a single page LTM poster (ahalhabsi@sbcglobal.net)
-- Added mp_reduce_2k which reduces an input modulo n = 2**p - k for any single digit k
-- Made the exptmod system be aware of the 2k reduction algorithms.
-- Rewrote mp_dr_reduce to be smaller, simpler and easier to understand.
May 17th, 2003 May 17th, 2003
v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well
as several smaller suggestions. Thanks! as several smaller suggestions. Thanks!

View File

@ -53,7 +53,7 @@ int main(void)
#ifdef TIMER #ifdef TIMER
int n; int n;
ulong64 tt; ulong64 tt;
FILE *log, *logb; FILE *log, *logb, *logc;
#endif #endif
mp_init(&a); mp_init(&a);
@ -62,11 +62,54 @@ int main(void)
mp_init(&d); mp_init(&d);
mp_init(&e); mp_init(&e);
mp_init(&f); mp_init(&f);
srand(time(NULL));
/* test mp_reduce_2k */
#if 0
for (cnt = 3; cnt <= 4096; ++cnt) {
mp_digit tmp;
mp_2expt(&a, cnt);
mp_sub_d(&a, 1, &a); /* a = 2**cnt - 1 */
printf("\nTesting %4d bits", cnt);
printf("(%d)", mp_reduce_is_2k(&a));
mp_reduce_2k_setup(&a, &tmp);
printf("(%d)", tmp);
for (ix = 0; ix < 100000; ix++) {
if (!(ix & 1023)) {printf("."); fflush(stdout); }
mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2);
mp_copy(&c, &b);
mp_mod(&c, &a, &c);
mp_reduce_2k(&b, &a, 1);
if (mp_cmp(&c, &b)) {
printf("FAILED\n");
exit(0);
}
}
}
#endif
/* test mp_div_3 */
#if 0
for (cnt = 0; cnt < 1000000; ) {
mp_digit r1, r2;
if (!(++cnt & 127)) printf("%9d\r", cnt);
mp_rand(&a, abs(rand()) % 32 + 1);
mp_div_d(&a, 3, &b, &r1);
mp_div_3(&a, &c, &r2);
if (mp_cmp(&b, &c) || r1 != r2) {
printf("Failure\n");
}
}
#endif
/* test the DR reduction */ /* test the DR reduction */
#if 0 #if 0
srand(time(NULL));
for (cnt = 2; cnt < 32; cnt++) { for (cnt = 2; cnt < 32; cnt++) {
printf("%d digit modulus\n", cnt); printf("%d digit modulus\n", cnt);
mp_grow(&a, cnt); mp_grow(&a, cnt);
@ -91,6 +134,7 @@ int main(void)
if (mp_cmp(&b, &c) != MP_EQ) { if (mp_cmp(&b, &c) != MP_EQ) {
printf("Failed on trial %lu\n", rr); exit(-1); printf("Failed on trial %lu\n", rr); exit(-1);
} }
} while (++rr < 1000000); } while (++rr < 1000000);
printf("Passed DR test for %d digits\n", cnt); printf("Passed DR test for %d digits\n", cnt);
@ -98,6 +142,9 @@ int main(void)
#endif #endif
#ifdef TIMER #ifdef TIMER
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC); printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC);
log = fopen("logs/add.log", "w"); log = fopen("logs/add.log", "w");
@ -172,9 +219,16 @@ int main(void)
} }
fclose(log); fclose(log);
} }
{
{
char *primes[] = { char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */ /* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079", "14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039", "101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
@ -196,6 +250,7 @@ int main(void)
}; };
log = fopen("logs/expt.log", "w"); log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w"); logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) { for (n = 0; primes[n]; n++) {
mp_read_radix(&a, primes[n], 10); mp_read_radix(&a, primes[n], 10);
mp_zero(&b); mp_zero(&b);
@ -224,11 +279,12 @@ int main(void)
exit(0); exit(0);
} }
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt); printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf((n < 7) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt); fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
} }
} }
fclose(log); fclose(log);
fclose(logb); fclose(logb);
fclose(logc);
log = fopen("logs/invmod.log", "w"); log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) { for (cnt = 4; cnt <= 128; cnt += 4) {
@ -263,6 +319,12 @@ int main(void)
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n = div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = 0; sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = 0;
/* force KARA and TOOM to enable despite cutoffs */
KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 110;
TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 150;
for (;;) { for (;;) {
/* randomly clear and re-init one variable, this has the affect of triming the alloc space */ /* randomly clear and re-init one variable, this has the affect of triming the alloc space */

2
etc/2kprime.1 Normal file
View File

@ -0,0 +1,2 @@
256-bits (k = 36113) = 115792089237316195423570985008687907853269984665640564039457584007913129603823
512-bits (k = 38117) = 13407807929942597099574024998205846127479365820592393377723561443721764030073546976801874298166903427690031858186486050853753882811946569946433649006045979

80
etc/2kprime.c Normal file
View File

@ -0,0 +1,80 @@
/* Makes safe primes of a 2k nature */
#include <tommath.h>
#include <time.h>
int sizes[] = {256, 512, 768, 1024, 1536, 2048, 3072, 4096};
int main(void)
{
char buf[2000];
int x, y, t;
mp_int q, p;
FILE *out;
clock_t t1;
mp_digit z;
mp_init_multi(&q, &p, NULL);
out = fopen("2kprime.1", "w");
for (x = 0; x < (int)(sizeof(sizes) / sizeof(sizes[0])); x++) {
top:
mp_2expt(&q, sizes[x]);
mp_add_d(&q, 3, &q);
z = -3;
t1 = clock();
for(;;) {
mp_sub_d(&q, 4, &q);
z += 4;
if (z > MP_MASK) {
printf("No primes of size %d found\n", sizes[x]);
break;
}
if (clock() - t1 > CLOCKS_PER_SEC) {
printf("."); fflush(stdout);
// sleep((clock() - t1 + CLOCKS_PER_SEC/2)/CLOCKS_PER_SEC);
t1 = clock();
}
/* quick test on q */
mp_prime_is_prime(&q, 1, &y);
if (y == 0) {
continue;
}
/* find (q-1)/2 */
mp_sub_d(&q, 1, &p);
mp_div_2(&p, &p);
mp_prime_is_prime(&p, 3, &y);
if (y == 0) {
continue;
}
/* test on q */
mp_prime_is_prime(&q, 3, &y);
if (y == 0) {
continue;
}
break;
}
if (y == 0) {
++sizes[x];
goto top;
}
mp_toradix(&q, buf, 10);
printf("\n\n%d-bits (k = %lu) = %s\n", sizes[x], z, buf);
fprintf(out, "%d-bits (k = %lu) = %s\n", sizes[x], z, buf); fflush(out);
}
return 0;
}

View File

@ -32,9 +32,13 @@ mersenne: mersenne.o
drprime: drprime.o drprime: drprime.o
$(CC) drprime.o $(LIBNAME) -o drprime $(CC) drprime.o $(LIBNAME) -o drprime
# fines 2k safe primes for the given config
2kprime: 2kprime.o
$(CC) 2kprime.o $(LIBNAME) -o 2kprime
mont: mont.o mont: mont.o
$(CC) mont.o $(LIBNAME) -o mont $(CC) mont.o $(LIBNAME) -o mont
clean: clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime

View File

@ -14,4 +14,7 @@ tune: tune.obj
cl tune.obj ../tommath.lib cl tune.obj ../tommath.lib
drprime: drprime.obj drprime: drprime.obj
cl drprime.obj ../tommath.lib cl drprime.obj ../tommath.lib
2kprime: 2kprime.obj
cl 2kprime.obj ../tommath.lib

View File

@ -8,10 +8,9 @@
int int
is_mersenne (long s, int *pp) is_mersenne (long s, int *pp)
{ {
mp_int n, u, mu; mp_int n, u;
int res, k; int res, k;
long ss;
*pp = 0; *pp = 0;
if ((res = mp_init (&n)) != MP_OKAY) { if ((res = mp_init (&n)) != MP_OKAY) {
@ -22,27 +21,14 @@ is_mersenne (long s, int *pp)
goto __N; goto __N;
} }
if ((res = mp_init (&mu)) != MP_OKAY) {
goto __U;
}
/* n = 2^s - 1 */ /* n = 2^s - 1 */
mp_set (&n, 1); if ((res = mp_2expt(&n, s)) != MP_OKAY) {
ss = s; goto __MU;
while (ss--) {
if ((res = mp_mul_2 (&n, &n)) != MP_OKAY) {
goto __MU;
}
} }
if ((res = mp_sub_d (&n, 1, &n)) != MP_OKAY) { if ((res = mp_sub_d (&n, 1, &n)) != MP_OKAY) {
goto __MU; goto __MU;
} }
/* setup mu */
if ((res = mp_reduce_setup (&mu, &n)) != MP_OKAY) {
goto __MU;
}
/* set u=4 */ /* set u=4 */
mp_set (&u, 4); mp_set (&u, 4);
@ -57,26 +43,26 @@ is_mersenne (long s, int *pp)
} }
/* make sure u is positive */ /* make sure u is positive */
if (u.sign == MP_NEG) { while (u.sign == MP_NEG) {
if ((res = mp_add (&u, &n, &u)) != MP_OKAY) { if ((res = mp_add (&u, &n, &u)) != MP_OKAY) {
goto __MU; goto __MU;
} }
} }
/* reduce */ /* reduce */
if ((res = mp_reduce (&u, &n, &mu)) != MP_OKAY) { if ((res = mp_reduce_2k (&u, &n, 1)) != MP_OKAY) {
goto __MU; goto __MU;
} }
} }
/* if u == 0 then its prime */ /* if u == 0 then its prime */
if (mp_iszero (&u) == 1) { if (mp_iszero (&u) == 1) {
*pp = 1; mp_prime_is_prime(&n, 3, pp);
if (*pp != 1) printf("FAILURE\n");
} }
res = MP_OKAY; res = MP_OKAY;
__MU:mp_clear (&mu); __MU:mp_clear (&u);
__U:mp_clear (&u);
__N:mp_clear (&n); __N:mp_clear (&n);
return res; return res;
} }

View File

@ -7,10 +7,11 @@ int main(void)
mp_digit mp; mp_digit mp;
long x, y; long x, y;
srand(time(NULL));
mp_init_multi(&modulus, &R, &p, &pp, NULL); mp_init_multi(&modulus, &R, &p, &pp, NULL);
/* loop through various sizes */ /* loop through various sizes */
for (x = 4; x < 128; x++) { for (x = 4; x < 256; x++) {
printf("DIGITS == %3ld...", x); fflush(stdout); printf("DIGITS == %3ld...", x); fflush(stdout);
/* make up the odd modulus */ /* make up the odd modulus */
@ -22,7 +23,7 @@ int main(void)
mp_montgomery_setup(&modulus, &mp); mp_montgomery_setup(&modulus, &mp);
/* now run through a bunch tests */ /* now run through a bunch tests */
for (y = 0; y < 100000; y++) { for (y = 0; y < 1000; y++) {
mp_rand(&p, x/2); /* p = random */ mp_rand(&p, x/2); /* p = random */
mp_mul(&p, &R, &pp); /* pp = R * p */ mp_mul(&p, &R, &pp); /* pp = R * p */
mp_montgomery_reduce(&pp, &modulus, mp); mp_montgomery_reduce(&pp, &modulus, mp);

View File

@ -8,17 +8,17 @@
#ifndef X86_TIMER #ifndef X86_TIMER
/* generic ISO C timer */ /* generic ISO C timer */
unsigned long long __T; ulong64 __T;
void t_start(void) { __T = clock(); } void t_start(void) { __T = clock(); }
unsigned long long t_read(void) { return clock() - __T; } ulong64 t_read(void) { return clock() - __T; }
#else #else
extern void t_start(void); extern void t_start(void);
extern unsigned long long t_read(void); extern ulong64 t_read(void);
#endif #endif
unsigned long long ulong64
time_mult (void) time_mult (int max)
{ {
int x, y; int x, y;
mp_int a, b, c; mp_int a, b, c;
@ -28,7 +28,7 @@ time_mult (void)
mp_init (&c); mp_init (&c);
t_start(); t_start();
for (x = 32; x <= 288; x += 4) { for (x = 32; x <= max; x += 4) {
mp_rand (&a, x); mp_rand (&a, x);
mp_rand (&b, x); mp_rand (&b, x);
for (y = 0; y < 100; y++) { for (y = 0; y < 100; y++) {
@ -41,8 +41,8 @@ time_mult (void)
return t_read(); return t_read();
} }
unsigned long long ulong64
time_sqr (void) time_sqr (int max)
{ {
int x, y; int x, y;
mp_int a, b; mp_int a, b;
@ -51,7 +51,7 @@ time_sqr (void)
mp_init (&b); mp_init (&b);
t_start(); t_start();
for (x = 32; x <= 288; x += 4) { for (x = 32; x <= max; x += 4) {
mp_rand (&a, x); mp_rand (&a, x);
for (y = 0; y < 100; y++) { for (y = 0; y < 100; y++) {
mp_sqr (&a, &b); mp_sqr (&a, &b);
@ -65,45 +65,85 @@ time_sqr (void)
int int
main (void) main (void)
{ {
int best_mult, best_square; int best_kmult, best_tmult, best_ksquare, best_tsquare;
unsigned long long best, ti; ulong64 best, ti;
FILE *log; FILE *log;
best_mult = best_square = 0; best_kmult = best_ksquare = best_tmult = best_tsquare = 0;
/* tune multiplication first */ /* tune multiplication first */
/* effectively turn TOOM off */
TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 100000;
log = fopen ("mult.log", "w"); log = fopen ("mult.log", "w");
best = -1; best = -1;
for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 200; KARATSUBA_MUL_CUTOFF++) { for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 200; KARATSUBA_MUL_CUTOFF++) {
ti = time_mult (); ti = time_mult (300);
printf ("%4d : %9llu\r", KARATSUBA_MUL_CUTOFF, ti); printf ("%4d : %9llu\r", KARATSUBA_MUL_CUTOFF, ti);
fprintf (log, "%d, %llu\n", KARATSUBA_MUL_CUTOFF, ti); fprintf (log, "%d, %llu\n", KARATSUBA_MUL_CUTOFF, ti);
fflush (stdout); fflush (stdout);
if (ti < best) { if (ti < best) {
printf ("New best: %llu, %d \n", ti, KARATSUBA_MUL_CUTOFF); printf ("New best: %llu, %d \n", ti, KARATSUBA_MUL_CUTOFF);
best = ti; best = ti;
best_mult = KARATSUBA_MUL_CUTOFF; best_kmult = KARATSUBA_MUL_CUTOFF;
} }
} }
fclose (log); fclose (log);
/* tune squaring */ /* tune squaring */
log = fopen ("sqr.log", "w"); log = fopen ("sqr.log", "w");
best = -1; best = -1;
for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 200; KARATSUBA_SQR_CUTOFF++) { for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 200; KARATSUBA_SQR_CUTOFF++) {
ti = time_sqr (); ti = time_sqr (300);
printf ("%4d : %9llu\r", KARATSUBA_SQR_CUTOFF, ti); printf ("%4d : %9llu\r", KARATSUBA_SQR_CUTOFF, ti);
fprintf (log, "%d, %llu\n", KARATSUBA_SQR_CUTOFF, ti); fprintf (log, "%d, %llu\n", KARATSUBA_SQR_CUTOFF, ti);
fflush (stdout); fflush (stdout);
if (ti < best) { if (ti < best) {
printf ("New best: %llu, %d \n", ti, KARATSUBA_SQR_CUTOFF); printf ("New best: %llu, %d \n", ti, KARATSUBA_SQR_CUTOFF);
best = ti; best = ti;
best_square = KARATSUBA_SQR_CUTOFF; best_ksquare = KARATSUBA_SQR_CUTOFF;
} }
} }
fclose (log); fclose (log);
KARATSUBA_MUL_CUTOFF = best_kmult;
KARATSUBA_SQR_CUTOFF = best_ksquare;
/* tune TOOM mult */
log = fopen ("tmult.log", "w");
best = -1;
for (TOOM_MUL_CUTOFF = best_kmult*5; TOOM_MUL_CUTOFF <= 800; TOOM_MUL_CUTOFF++) {
ti = time_mult (1200);
printf ("%4d : %9llu\r", TOOM_MUL_CUTOFF, ti);
fprintf (log, "%d, %llu\n", TOOM_MUL_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %llu, %d \n", ti, TOOM_MUL_CUTOFF);
best = ti;
best_tmult = TOOM_MUL_CUTOFF;
}
}
fclose (log);
/* tune TOOM sqr */
log = fopen ("tsqr.log", "w");
best = -1;
for (TOOM_SQR_CUTOFF = best_ksquare*3; TOOM_SQR_CUTOFF <= 800; TOOM_SQR_CUTOFF++) {
ti = time_sqr (1200);
printf ("%4d : %9llu\r", TOOM_SQR_CUTOFF, ti);
fprintf (log, "%d, %llu\n", TOOM_SQR_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %llu, %d \n", ti, TOOM_SQR_CUTOFF);
best = ti;
best_tsquare = TOOM_SQR_CUTOFF;
}
}
fclose (log);
printf printf
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\n", ("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\nToom Multiplier Cutoff: %d\nToom Squaring Cutoff: %d\n",
best_mult, best_square); best_kmult, best_ksquare, best_tmult, best_tsquare);
return 0; return 0;
} }

4
gen.pl
View File

@ -6,7 +6,7 @@
use strict; use strict;
open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!"; open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!";
foreach my $filename (glob "bn_*.c") { foreach my $filename (glob "bn*.c") {
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!"; open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
print OUT "/* Start: $filename */\n"; print OUT "/* Start: $filename */\n";
print OUT qq[#line 0 "$filename"\n]; print OUT qq[#line 0 "$filename"\n];
@ -14,5 +14,5 @@ foreach my $filename (glob "bn_*.c") {
print OUT "\n/* End: $filename */\n\n"; print OUT "\n/* End: $filename */\n\n";
close SRC or die "Error closing $filename after reading: $!"; close SRC or die "Error closing $filename after reading: $!";
} }
print OUT "\b/* EOF */\n"; print OUT "\n/* EOF */\n";
close OUT or die "Error closing mpi.c after writing: $!"; close OUT or die "Error closing mpi.c after writing: $!";

View File

@ -1,16 +1,16 @@
224 11039864 224 11069160
448 9206336 448 9156136
672 8178200 672 8089755
896 7432176 896 7399424
1120 6433264 1120 6389352
1344 5847056 1344 5818648
1568 5270184 1568 5257112
1792 4943416 1792 4982160
2016 4520016 2016 4527856
2240 4256168 2240 4325312
2464 3999224 2464 4051760
2688 3714896 2688 3767640
2912 3572720 2912 3612520
3136 3340176 3136 3415208
3360 3222584 3360 3258656
3584 3036336 3584 3113360

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -1,7 +1,7 @@
14364 666 513 680
21532 253 769 257
28700 117 1025 117
57372 17 2049 17
71708 9 2561 9
86044 5 3073 5
114716 2 4097 2

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

6
logs/expt_2k.log Normal file
View File

@ -0,0 +1,6 @@
521 736
607 552
1279 112
2203 33
3217 13
4253 6

View File

@ -1,7 +1,7 @@
14896 1088 532 1064
21952 468 784 460
29008 244 1036 240
43120 91 1540 91
58016 43 2072 43
86240 15 3080 15
115248 6 4116 6

View File

@ -1,5 +1,5 @@
set terminal png color set terminal png color
set size 1.5 set size 1.75
set ylabel "Operations per Second" set ylabel "Operations per Second"
set xlabel "Operand size (bits)" set xlabel "Operand size (bits)"
@ -10,7 +10,7 @@ set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)" plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png" set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)" plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
set output "invmod.png" set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse" plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,32 +1,32 @@
112 15608 112 16248
224 7840 224 8192
336 5104 336 5320
448 3376 448 3560
560 2616 560 2728
672 1984 672 2064
784 1640 784 1704
896 2056 896 2176
1008 1136 1008 1184
1120 936 1120 976
1232 1240 1232 1280
1344 1112 1344 1176
1456 608 1456 624
1568 873 1568 912
1680 492 1680 504
1792 444 1792 452
1904 640 1904 658
2016 584 2016 608
2128 328 2128 336
2240 307 2240 312
2352 283 2352 288
2464 256 2464 264
2576 393 2576 408
2688 365 2688 376
2800 344 2800 354
2912 196 2912 198
3024 301 3024 307
3136 170 3136 173
3248 160 3248 162
3360 250 3360 256
3472 144 3472 145
3584 224 3584 226

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

13
logs/k7/README Normal file
View File

@ -0,0 +1,13 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

16
logs/k7/add.log Normal file
View File

@ -0,0 +1,16 @@
224 11069160
448 9156136
672 8089755
896 7399424
1120 6389352
1344 5818648
1568 5257112
1792 4982160
2016 4527856
2240 4325312
2464 4051760
2688 3767640
2912 3612520
3136 3415208
3360 3258656
3584 3113360

BIN
logs/k7/addsub.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

7
logs/k7/expt.log Normal file
View File

@ -0,0 +1,7 @@
513 664
769 256
1025 117
2049 17
2561 9
3073 5
4097 2

BIN
logs/k7/expt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

7
logs/k7/expt_dr.log Normal file
View File

@ -0,0 +1,7 @@
532 1088
784 460
1036 240
1540 92
2072 43
3080 15
4116 6

17
logs/k7/graphs.dem Normal file
View File

@ -0,0 +1,17 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

24
logs/k7/index.html Normal file
View File

@ -0,0 +1,24 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

32
logs/k7/invmod.log Normal file
View File

@ -0,0 +1,32 @@
112 16248
224 8192
336 5320
448 3560
560 2728
672 2064
784 1704
896 2176
1008 1184
1120 976
1232 1280
1344 1176
1456 624
1568 912
1680 504
1792 452
1904 658
2016 608
2128 336
2240 312
2352 288
2464 264
2576 408
2688 376
2800 354
2912 198
3024 307
3136 173
3248 162
3360 256
3472 145
3584 226

BIN
logs/k7/invmod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

17
logs/k7/mult.log Normal file
View File

@ -0,0 +1,17 @@
896 322904
1344 151592
1792 90472
2240 59984
2688 42624
3136 31872
3584 24704
4032 19704
4480 16096
4928 13376
5376 11272
5824 9616
6272 8360
6720 7304
7168 1664
7616 1472
8064 1328

BIN
logs/k7/mult.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

17
logs/k7/mult_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 322872
1344 151688
1792 90480
2240 59984
2688 42656
3136 32144
3584 25840
4032 21328
4480 17856
4928 14928
5376 12856
5824 11256
6272 9880
6720 8984
7168 7928
7616 7200
8064 6576

17
logs/k7/sqr.log Normal file
View File

@ -0,0 +1,17 @@
896 415472
1344 223736
1792 141232
2240 97624
2688 71400
3136 54800
3584 16904
4032 13528
4480 10968
4928 9128
5376 7784
5824 6672
6272 5760
6720 5056
7168 4440
7616 3952
8064 3512

17
logs/k7/sqr_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 420464
1344 224800
1792 142808
2240 97704
2688 71416
3136 54504
3584 38320
4032 32360
4480 27576
4928 23840
5376 20688
5824 18264
6272 16176
6720 14440
7168 11688
7616 10752
8064 9936

16
logs/k7/sub.log Normal file
View File

@ -0,0 +1,16 @@
224 9728504
448 8573648
672 7488096
896 6714064
1120 5950472
1344 5457400
1568 5038896
1792 4683632
2016 4384656
2240 4105976
2464 3871608
2688 3650680
2912 3463552
3136 3290016
3360 3135272
3584 2993848

View File

@ -1,17 +1,17 @@
896 321504 896 322904
1344 150784 1344 151592
1792 90288 1792 90472
2240 59760 2240 59984
2688 42480 2688 42624
3136 32056 3136 31872
3584 24600 3584 24704
4032 19656 4032 19704
4480 16024 4480 16096
4928 13328 4928 13376
5376 11280 5376 11272
5824 9624 5824 9616
6272 8336 6272 8360
6720 7280 6720 7304
7168 1648 7168 1664
7616 1464 7616 1472
8064 1296 8064 1328

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

After

Width:  |  Height:  |  Size: 8.1 KiB

View File

@ -1,17 +1,17 @@
896 321928 896 322872
1344 150752 1344 151688
1792 90136 1792 90480
2240 59888 2240 59984
2688 42480 2688 42656
3136 32080 3136 32144
3584 25744 3584 25840
4032 21216 4032 21328
4480 17912 4480 17856
4928 14896 4928 14928
5376 12936 5376 12856
5824 11216 5824 11256
6272 9848 6272 9880
6720 8896 6720 8984
7168 7968 7168 7928
7616 7248 7616 7200
8064 6600 8064 6576

13
logs/p4/README Normal file
View File

@ -0,0 +1,13 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

16
logs/p4/add.log Normal file
View File

@ -0,0 +1,16 @@
224 8113248
448 6585584
672 5687678
896 4761144
1120 4111592
1344 3995154
1568 3532387
1792 3225400
2016 2963960
2240 2720112
2464 2533952
2688 2307168
2912 2287064
3136 2150160
3360 2035992
3584 1936304

BIN
logs/p4/addsub.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.7 KiB

7
logs/p4/expt.log Normal file
View File

@ -0,0 +1,7 @@
513 195
769 68
1025 31
2049 4
2561 2
3073 1
4097 0

BIN
logs/p4/expt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.4 KiB

7
logs/p4/expt_dr.log Normal file
View File

@ -0,0 +1,7 @@
532 393
784 158
1036 79
1540 27
2072 12
3080 4
4116 1

17
logs/p4/graphs.dem Normal file
View File

@ -0,0 +1,17 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

24
logs/p4/index.html Normal file
View File

@ -0,0 +1,24 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

32
logs/p4/invmod.log Normal file
View File

@ -0,0 +1,32 @@
112 13608
224 6872
336 4264
448 2792
560 2144
672 1560
784 1296
896 1672
1008 896
1120 736
1232 1024
1344 888
1456 472
1568 680
1680 373
1792 328
1904 484
2016 436
2128 232
2240 211
2352 200
2464 177
2576 293
2688 262
2800 251
2912 137
3024 216
3136 117
3248 113
3360 181
3472 98
3584 158

BIN
logs/p4/invmod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

17
logs/p4/mult.log Normal file
View File

@ -0,0 +1,17 @@
896 77600
1344 35776
1792 19688
2240 13248
2688 9424
3136 7056
3584 5464
4032 4368
4480 3568
4928 2976
5376 2520
5824 2152
6272 1872
6720 1632
7168 650
7616 576
8064 515

BIN
logs/p4/mult.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

17
logs/p4/mult_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 77752
1344 35832
1792 19688
2240 14704
2688 10832
3136 8336
3584 6600
4032 5424
4480 4648
4928 3976
5376 3448
5824 3016
6272 2664
6720 2384
7168 2120
7616 1912
8064 1752

17
logs/p4/sqr.log Normal file
View File

@ -0,0 +1,17 @@
896 128088
1344 63640
1792 37968
2240 25488
2688 18176
3136 13672
3584 4920
4032 3912
4480 3160
4928 2616
5376 2216
5824 1896
6272 1624
6720 1408
7168 1240
7616 1096
8064 984

17
logs/p4/sqr_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 127456
1344 63752
1792 37920
2240 25440
2688 18200
3136 13728
3584 10968
4032 9072
4480 7608
4928 6440
5376 5528
5824 4768
6272 4328
6720 3888
7168 3504
7616 3176
8064 2896

16
logs/p4/sub.log Normal file
View File

@ -0,0 +1,16 @@
224 7355896
448 6162880
672 5218984
896 4622776
1120 3999320
1344 3629480
1568 3290384
1792 2954752
2016 2737056
2240 2563320
2464 2451928
2688 2310920
2912 2139048
3136 2034080
3360 1890800
3584 1808624

View File

@ -1,17 +1,17 @@
896 416968 896 415472
1344 223672 1344 223736
1792 141552 1792 141232
2240 97280 2240 97624
2688 71304 2688 71400
3136 54648 3136 54800
3584 16264 3584 16904
4032 13000 4032 13528
4480 10528 4480 10968
4928 8776 4928 9128
5376 7464 5376 7784
5824 6440 5824 6672
6272 5520 6272 5760
6720 4808 6720 5056
7168 4264 7168 4440
7616 3784 7616 3952
8064 3368 8064 3512

View File

@ -1,17 +1,17 @@
896 416656 896 420464
1344 223728 1344 224800
1792 141288 1792 142808
2240 97456 2240 97704
2688 71152 2688 71416
3136 54392 3136 54504
3584 38552 3584 38320
4032 32216 4032 32360
4480 27384 4480 27576
4928 23792 4928 23840
5376 20728 5376 20688
5824 18232 5824 18264
6272 16160 6272 16176
6720 14408 6720 14440
7168 11696 7168 11688
7616 10768 7616 10752
8064 9920 8064 9936

View File

@ -1,16 +1,16 @@
224 9862520 224 9728504
448 8562344 448 8573648
672 7661400 672 7488096
896 6838128 896 6714064
1120 5911144 1120 5950472
1344 5394040 1344 5457400
1568 4993760 1568 5038896
1792 4624240 1792 4683632
2016 4332024 2016 4384656
2240 4029312 2240 4105976
2464 3790784 2464 3871608
2688 3587216 2688 3650680
2912 3397952 2912 3463552
3136 3239736 3136 3290016
3360 3080616 3360 3135272
3584 2933104 3584 2993848

View File

@ -1,6 +1,6 @@
CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops
VERSION=0.17 VERSION=0.18
default: libtommath.a default: libtommath.a
@ -33,7 +33,9 @@ bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o bn_radix
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \ bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \ bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o bn_mp_multi.o \ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o bn_mp_multi.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o
libtommath.a: $(OBJECTS) libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS) $(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
@ -63,6 +65,11 @@ docdvi: tommath.src
makeindex tommath makeindex tommath
latex tommath > /dev/null latex tommath > /dev/null
# poster, makes the single page PDF poster
poster: poster.tex
pdflatex poster
rm -f poster.aux poster.log
# makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files # makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files
docs: docs:
cd pics ; make pdfes cd pics ; make pdfes
@ -88,11 +95,12 @@ manual:
clean: clean:
rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \ rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c \
poster.aux poster.dvi poster.log
cd etc ; make clean cd etc ; make clean
cd pics ; make clean cd pics ; make clean
zipup: clean manual zipup: clean manual poster
perl gen.pl ; mv mpi.c pre_gen/ ; \ perl gen.pl ; mv mpi.c pre_gen/ ; \
cd .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \ cd .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \
cp -R ./libtommath/* ./libtommath-$(VERSION)/ ; tar -c libtommath-$(VERSION)/* > ltm-$(VERSION).tar ; \ cp -R ./libtommath/* ./libtommath-$(VERSION)/ ; tar -c libtommath-$(VERSION)/* > ltm-$(VERSION).tar ; \

37
makefile.bcc Normal file
View File

@ -0,0 +1,37 @@
#
# Borland C++Builder Makefile (makefile.bcc)
#
LIB = tlib
CC = bcc32
CFLAGS = -c -O2 -I.
OBJECTS=bncore.obj bn_mp_init.obj bn_mp_clear.obj bn_mp_exch.obj bn_mp_grow.obj bn_mp_shrink.obj \
bn_mp_clamp.obj bn_mp_zero.obj bn_mp_set.obj bn_mp_set_int.obj bn_mp_init_size.obj bn_mp_copy.obj \
bn_mp_init_copy.obj bn_mp_abs.obj bn_mp_neg.obj bn_mp_cmp_mag.obj bn_mp_cmp.obj bn_mp_cmp_d.obj \
bn_mp_rshd.obj bn_mp_lshd.obj bn_mp_mod_2d.obj bn_mp_div_2d.obj bn_mp_mul_2d.obj bn_mp_div_2.obj \
bn_mp_mul_2.obj bn_s_mp_add.obj bn_s_mp_sub.obj bn_fast_s_mp_mul_digs.obj bn_s_mp_mul_digs.obj \
bn_fast_s_mp_mul_high_digs.obj bn_s_mp_mul_high_digs.obj bn_fast_s_mp_sqr.obj bn_s_mp_sqr.obj \
bn_mp_add.obj bn_mp_sub.obj bn_mp_karatsuba_mul.obj bn_mp_mul.obj bn_mp_karatsuba_sqr.obj \
bn_mp_sqr.obj bn_mp_div.obj bn_mp_mod.obj bn_mp_add_d.obj bn_mp_sub_d.obj bn_mp_mul_d.obj \
bn_mp_div_d.obj bn_mp_mod_d.obj bn_mp_expt_d.obj bn_mp_addmod.obj bn_mp_submod.obj \
bn_mp_mulmod.obj bn_mp_sqrmod.obj bn_mp_gcd.obj bn_mp_lcm.obj bn_fast_mp_invmod.obj bn_mp_invmod.obj \
bn_mp_reduce.obj bn_mp_montgomery_setup.obj bn_fast_mp_montgomery_reduce.obj bn_mp_montgomery_reduce.obj \
bn_mp_exptmod_fast.obj bn_mp_exptmod.obj bn_mp_2expt.obj bn_mp_n_root.obj bn_mp_jacobi.obj bn_reverse.obj \
bn_mp_count_bits.obj bn_mp_read_unsigned_bin.obj bn_mp_read_signed_bin.obj bn_mp_to_unsigned_bin.obj \
bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn_radix.obj \
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj
TARGET = libtommath.lib
$(TARGET): $(OBJECTS)
.c.objbj:
$(CC) $(CFLAGS) $<
$(LIB) $(TARGET) -+$@

View File

@ -23,7 +23,10 @@ bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \ bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \ bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \ bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj
library: $(OBJECTS) library: $(OBJECTS)

View File

@ -40,14 +40,10 @@ void rand_num(mp_int *a)
int n, size; int n, size;
unsigned char buf[2048]; unsigned char buf[2048];
top: size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 1031;
size = 1 + ((fgetc(rng)*fgetc(rng)) % 1024);
buf[0] = (fgetc(rng)&1)?1:0; buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng); fread(buf+1, 1, size, rng);
for (n = 0; n < size; n++) { while (buf[1] == 0) buf[1] = fgetc(rng);
if (buf[n+1]) break;
}
if (n == size) goto top;
mp_read_raw(a, buf, 1+size); mp_read_raw(a, buf, 1+size);
} }
@ -56,14 +52,10 @@ void rand_num2(mp_int *a)
int n, size; int n, size;
unsigned char buf[2048]; unsigned char buf[2048];
top: size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 97;
size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
buf[0] = (fgetc(rng)&1)?1:0; buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng); fread(buf+1, 1, size, rng);
for (n = 0; n < size; n++) { while (buf[1] == 0) buf[1] = fgetc(rng);
if (buf[n+1]) break;
}
if (n == size) goto top;
mp_read_raw(a, buf, 1+size); mp_read_raw(a, buf, 1+size);
} }
@ -73,6 +65,7 @@ int main(void)
{ {
int n; int n;
mp_int a, b, c, d, e; mp_int a, b, c, d, e;
clock_t t1;
char buf[4096]; char buf[4096];
mp_init(&a); mp_init(&a);
@ -108,8 +101,14 @@ int main(void)
} }
} }
t1 = clock();
for (;;) { for (;;) {
n = fgetc(rng) % 13; if (clock() - t1 > CLOCKS_PER_SEC) {
sleep(1);
t1 = clock();
}
n = fgetc(rng) % 13;
if (n == 0) { if (n == 0) {
/* add tests */ /* add tests */
@ -227,6 +226,7 @@ int main(void)
rand_num2(&a); rand_num2(&a);
rand_num2(&b); rand_num2(&b);
rand_num2(&c); rand_num2(&c);
// if (c.dp[0]&1) mp_add_d(&c, 1, &c);
a.sign = b.sign = c.sign = 0; a.sign = b.sign = c.sign = 0;
mp_exptmod(&a, &b, &c, &d); mp_exptmod(&a, &b, &c, &d);
printf("expt\n"); printf("expt\n");

BIN
pics/expt_state.sxd Normal file

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More