added libtommath-0.18

This commit is contained in:
Tom St Denis 2003-05-29 13:35:26 +00:00 committed by Steffen Jaeckel
parent fd181cc841
commit 0ef44cea9b
108 changed files with 9777 additions and 2427 deletions

BIN
bn.pdf

Binary file not shown.

2
bn.tex
View File

@ -1,7 +1,7 @@
\documentclass[]{article}
\begin{document}
\title{LibTomMath v0.17 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
\title{LibTomMath v0.18 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* computes xR^-1 == x (mod N) via Montgomery Reduction
/* computes xR**-1 == x (mod N) via Montgomery Reduction
*
* This is an optimized implementation of mp_montgomery_reduce
* which uses the comba method to quickly calculate the columns of the
@ -23,76 +23,77 @@
* Based on Algorithm 14.32 on pp.601 of HAC.
*/
int
fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
int ix, res, olduse;
mp_word W[MP_WARRAY];
/* get old used count */
olduse = a->used;
olduse = x->used;
/* grow a as required */
if (a->alloc < m->used + 1) {
if ((res = mp_grow (a, m->used + 1)) != MP_OKAY) {
if (x->alloc < n->used + 1) {
if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
return res;
}
}
{
register mp_word *_W;
register mp_digit *tmpa;
register mp_digit *tmpx;
_W = W;
tmpa = a->dp;
tmpx = x->dp;
/* copy the digits of a into W[0..a->used-1] */
for (ix = 0; ix < a->used; ix++) {
*_W++ = *tmpa++;
for (ix = 0; ix < x->used; ix++) {
*_W++ = *tmpx++;
}
/* zero the high words of W[a->used..m->used*2] */
for (; ix < m->used * 2 + 1; ix++) {
for (; ix < n->used * 2 + 1; ix++) {
*_W++ = 0;
}
}
for (ix = 0; ix < m->used; ix++) {
/* ui = ai * m' mod b
for (ix = 0; ix < n->used; ix++) {
/* mu = ai * m' mod b
*
* We avoid a double precision multiplication (which isn't required)
* by casting the value down to a mp_digit. Note this requires that W[ix-1] have
* the carry cleared (see after the inner loop)
* by casting the value down to a mp_digit. Note this requires
* that W[ix-1] have the carry cleared (see after the inner loop)
*/
register mp_digit ui;
ui = (((mp_digit) (W[ix] & MP_MASK)) * mp) & MP_MASK;
register mp_digit mu;
mu = (((mp_digit) (W[ix] & MP_MASK)) * rho) & MP_MASK;
/* a = a + ui * m * b^i
/* a = a + mu * m * b**i
*
* This is computed in place and on the fly. The multiplication
* by b^i is handled by offseting which columns the results
* by b**i is handled by offseting which columns the results
* are added to.
*
* Note the comba method normally doesn't handle carries in the inner loop
* In this case we fix the carry from the previous column since the Montgomery
* reduction requires digits of the result (so far) [see above] to work. This is
* handled by fixing up one carry after the inner loop. The carry fixups are done
* in order so after these loops the first m->used words of W[] have the carries
* fixed
* Note the comba method normally doesn't handle carries in the
* inner loop In this case we fix the carry from the previous
* column since the Montgomery reduction requires digits of the
* result (so far) [see above] to work. This is
* handled by fixing up one carry after the inner loop. The
* carry fixups are done in order so after these loops the
* first m->used words of W[] have the carries fixed
*/
{
register int iy;
register mp_digit *tmpx;
register mp_digit *tmpn;
register mp_word *_W;
/* alias for the digits of the modulus */
tmpx = m->dp;
tmpn = n->dp;
/* Alias for the columns set by an offset of ix */
_W = W + ix;
/* inner loop */
for (iy = 0; iy < m->used; iy++) {
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
for (iy = 0; iy < n->used; iy++) {
*_W++ += ((mp_word) mu) * ((mp_word) * tmpn++);
}
}
@ -102,44 +103,44 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
{
register mp_digit *tmpa;
register mp_digit *tmpx;
register mp_word *_W, *_W1;
/* nox fix rest of carries */
_W1 = W + ix;
_W = W + ++ix;
for (; ix <= m->used * 2 + 1; ix++) {
for (; ix <= n->used * 2 + 1; ix++) {
*_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
}
/* copy out, A = A/b^n
/* copy out, A = A/b**n
*
* The result is A/b^n but instead of converting from an array of mp_word
* to mp_digit than calling mp_rshd we just copy them in the right
* order
* The result is A/b**n but instead of converting from an
* array of mp_word to mp_digit than calling mp_rshd
* we just copy them in the right order
*/
tmpa = a->dp;
_W = W + m->used;
tmpx = x->dp;
_W = W + n->used;
for (ix = 0; ix < m->used + 1; ix++) {
*tmpa++ = *_W++ & ((mp_word) MP_MASK);
for (ix = 0; ix < n->used + 1; ix++) {
*tmpx++ = *_W++ & ((mp_word) MP_MASK);
}
/* zero oldused digits, if the input a was larger than
* m->used+1 we'll have to clear the digits */
for (; ix < olduse; ix++) {
*tmpa++ = 0;
*tmpx++ = 0;
}
}
/* set the max used and clamp */
a->used = m->used + 1;
mp_clamp (a);
x->used = n->used + 1;
mp_clamp (x);
/* if A >= m then A = A - m */
if (mp_cmp_mag (a, m) != MP_LT) {
return s_mp_sub (a, m, a);
if (mp_cmp_mag (x, n) != MP_LT) {
return s_mp_sub (x, n, x);
}
return MP_OKAY;
}

View File

@ -16,15 +16,17 @@
/* fast squaring
*
* This is the comba method where the columns of the product are computed first
* then the carries are computed. This has the effect of making a very simple
* inner loop that is executed the most
* This is the comba method where the columns of the product
* are computed first then the carries are computed. This
* has the effect of making a very simple inner loop that
* is executed the most
*
* W2 represents the outer products and W the inner.
*
* A further optimizations is made because the inner products are of the form
* "A * B * 2". The *2 part does not need to be computed until the end which is
* good because 64-bit shifts are slow!
* A further optimizations is made because the inner
* products are of the form "A * B * 2". The *2 part does
* not need to be computed until the end which is good
* because 64-bit shifts are slow!
*
* Based on Algorithm 14.16 on pp.597 of HAC.
*
@ -48,26 +50,15 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
* Note that there are two buffers. Since squaring requires
* a outter and inner product and the inner product requires
* computing a product and doubling it (a relatively expensive
* op to perform n^2 times if you don't have to) the inner and
* op to perform n**2 times if you don't have to) the inner and
* outer products are computed in different buffers. This way
* the inner product can be doubled using n doublings instead of
* n^2
* n**2
*/
memset (W, 0, newused * sizeof (mp_word));
memset (W2, 0, newused * sizeof (mp_word));
/* note optimization
* values in W2 are only written in even locations which means
* we can collapse the array to 256 words [and fixup the memset above]
* provided we also fix up the summations below. Ideally
* the fixup loop should be unrolled twice to handle the even/odd
* cases, and then a final step to handle odd cases [e.g. newused == odd]
*
* This will not only save ~8*256 = 2KB of stack but lower the number of
* operations required to finally fix up the columns
*/
/* This computes the inner product. To simplify the inner N^2 loop
/* This computes the inner product. To simplify the inner N**2 loop
* the multiplication by two is done afterwards in the N loop.
*/
for (ix = 0; ix < pa; ix++) {
@ -101,18 +92,19 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
}
/* setup dest */
olduse = b->used;
olduse = b->used;
b->used = newused;
/* double first value, since the inner products are half of what they should be */
W[0] += W[0] + W2[0];
/* now compute digits */
{
register mp_digit *tmpb;
tmpb = b->dp;
/* double first value, since the inner products are
* half of what they should be
*/
W[0] += W[0] + W2[0];
tmpb = b->dp;
for (ix = 1; ix < newused; ix++) {
/* double/add next digit */
W[ix] += W[ix] + W2[ix];
@ -120,9 +112,13 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
*tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
}
/* set the last value. Note even if the carry is zero
* this is required since the next step will not zero
* it if b originally had a value at b->dp[2*a.used]
*/
*tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
/* clear high */
/* clear high digits */
for (; ix < olduse; ix++) {
*tmpb++ = 0;
}

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* computes a = 2^b
/* computes a = 2**b
*
* Simple algorithm which zeroes the int, grows it then just sets one bit
* as required.

View File

@ -21,7 +21,7 @@ mp_copy (mp_int * a, mp_int * b)
int res, n;
/* if dst == src do nothing */
if (a == b || a->dp == b->dp) {
if (a == b) {
return MP_OKAY;
}

View File

@ -21,11 +21,15 @@ mp_count_bits (mp_int * a)
int r;
mp_digit q;
/* shortcut */
if (a->used == 0) {
return 0;
}
/* get number of digits and add that */
r = (a->used - 1) * DIGIT_BIT;
/* take the last digit and count the bits in it */
q = a->dp[a->used - 1];
while (q > ((mp_digit) 0)) {
++r;

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* shift right by a certain bit count (store quotient in c, remainder in d) */
/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
int
mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
{
@ -81,7 +81,6 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
}
}
mp_clamp (c);
res = MP_OKAY;
if (d != NULL) {
mp_exch (&t, d);
}

64
bn_mp_div_3.c Normal file
View File

@ -0,0 +1,64 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* divide by three (based on routine from MPI and the GMP manual) */
int
mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
{
mp_int q;
mp_word w, t;
mp_digit b;
int res, ix;
/* b = 2**DIGIT_BIT / 3 */
b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
return res;
}
q.used = a->used;
q.sign = a->sign;
w = 0;
for (ix = a->used - 1; ix >= 0; ix--) {
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
if (w >= 3) {
t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
w -= (t << ((mp_word)1)) + t;
while (w >= 3) {
t += 1;
w -= 3;
}
} else {
t = 0;
}
q.dp[ix] = t;
}
if (d != NULL) {
*d = w;
}
if (c != NULL) {
mp_clamp(&q);
mp_exch(&q, c);
}
mp_clear(&q);
return res;
}

View File

@ -14,31 +14,51 @@
*/
#include <tommath.h>
/* single digit division */
/* single digit division (based on routine from MPI) */
int
mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
{
mp_int t, t2;
int res;
if ((res = mp_init (&t)) != MP_OKAY) {
return res;
mp_int q;
mp_word w, t;
int res, ix;
if (b == 0) {
return MP_VAL;
}
if ((res = mp_init (&t2)) != MP_OKAY) {
mp_clear (&t);
return res;
if (b == 3) {
return mp_div_3(a, c, d);
}
mp_set (&t, b);
res = mp_div (a, &t, c, &t2);
/* set remainder if not null */
if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
return res;
}
q.used = a->used;
q.sign = a->sign;
w = 0;
for (ix = a->used - 1; ix >= 0; ix--) {
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
if (w >= b) {
t = w / b;
w = w % b;
} else {
t = 0;
}
q.dp[ix] = t;
}
if (d != NULL) {
*d = t2.dp[0];
*d = w;
}
mp_clear (&t);
mp_clear (&t2);
if (c != NULL) {
mp_clamp(&q);
mp_exch(&q, c);
}
mp_clear(&q);
return res;
}

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* reduce "a" in place modulo "b" using the Diminished Radix algorithm.
/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
*
* Based on algorithm from the paper
*
@ -23,107 +23,64 @@
* POSTECH Information Research Laboratories
*
* The modulus must be of a special format [see manual]
*
* Has been modified to use algorithm 7.10 from the LTM book instead
*/
int
mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
{
int err, i, j, k;
mp_word r;
mp_digit mu, *tmpj, *tmpi;
/* k = digits in modulus */
k = b->used;
/* ensure that "a" has at least 2k digits */
if (a->alloc < k + k) {
if ((err = mp_grow (a, k + k)) != MP_OKAY) {
int err, i, m;
mp_word r;
mp_digit mu, *tmpx1, *tmpx2;
/* m = digits in modulus */
m = n->used;
/* ensure that "x" has at least 2m digits */
if (x->alloc < m + m) {
if ((err = mp_grow (x, m + m)) != MP_OKAY) {
return err;
}
}
/* alias for a->dp[i] */
tmpi = a->dp + k + k - 1;
/* for (i = 2k - 1; i >= k; i = i - 1)
*
* This is the main loop of the reduction. Note that at the end
* the words above position k are not zeroed as expected. The end
* result is that the digits from 0 to k-1 are the residue. So
* we have to clear those afterwards.
*/
for (i = k + k - 1; i >= k; i = i - 1) {
/* x[i - 1 : i - k] += x[i]*mp */
/* x[i] * mp */
r = ((mp_word) *tmpi--) * ((mp_word) mp);
/* now add r to x[i-1:i-k]
*
* First add it to the first digit x[i-k] then form the carry
* then enter the main loop
*/
j = i - k;
/* alias for a->dp[j] */
tmpj = a->dp + j;
/* add digit */
*tmpj += (mp_digit)(r & MP_MASK);
/* this is the carry */
mu = (r >> ((mp_word) DIGIT_BIT)) + (*tmpj >> DIGIT_BIT);
/* clear carry from a->dp[j] */
*tmpj++ &= MP_MASK;
/* now add rest of the digits
*
* Note this is basically a simple single digit addition to
* a larger multiple digit number. This is optimized somewhat
* because the propagation of carries is not likely to move
* more than a few digits.
*
*/
for (++j; mu != 0 && j <= (i - 1); ++j) {
*tmpj += mu;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
}
/* if final carry */
if (mu != 0) {
/* add mp to this to correct */
j = i - k;
tmpj = a->dp + j;
*tmpj += mp;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
/* now handle carries */
for (++j; mu != 0 && j <= (i - 1); j++) {
*tmpj += mu;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
}
}
/* top of loop, this is where the code resumes if
* another reduction pass is required.
*/
top:
/* aliases for digits */
/* alias for lower half of x */
tmpx1 = x->dp;
/* alias for upper half of x, or x/B**m */
tmpx2 = x->dp + m;
/* set carry to zero */
mu = 0;
/* compute (x mod B**m) + mp * [x/B**m] inline and inplace */
for (i = 0; i < m; i++) {
r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
*tmpx1++ = r & MP_MASK;
mu = r >> ((mp_word)DIGIT_BIT);
}
/* zero words above k */
tmpi = a->dp + k;
for (i = k; i < a->used; i++) {
*tmpi++ = 0;
/* set final carry */
*tmpx1++ = mu;
/* zero words above m */
for (i = m + 1; i < x->used; i++) {
*tmpx1++ = 0;
}
/* clamp, sub and return */
mp_clamp (a);
mp_clamp (x);
/* if a >= b [b == modulus] then subtract the modulus to fix up */
if (mp_cmp_mag (a, b) != MP_LT) {
return s_mp_sub (a, b, a);
/* if x >= n then subtract and reduce again
* Each successive "recursion" makes the input smaller and smaller.
*/
if (mp_cmp_mag (x, n) != MP_LT) {
s_mp_sub(x, n, x);
goto top;
}
return MP_OKAY;
}

View File

@ -20,6 +20,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
/* the casts are required if DIGIT_BIT is one less than
* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
*/
*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - ((mp_word)a->dp[0]));
*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
((mp_word)a->dp[0]));
}

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* calculate c = a^b using a square-multiply algorithm */
/* calculate c = a**b using a square-multiply algorithm */
int
mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
{

View File

@ -14,7 +14,6 @@
*/
#include <tommath.h>
static int f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
/* this is a shell function that calls either the normal or Montgomery
* exptmod functions. Originally the call to the montgomery code was
@ -55,212 +54,22 @@ mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
return err;
}
/* and now compute (1/G)^|X| instead of G^X [X < 0] */
/* and now compute (1/G)**|X| instead of G**X [X < 0] */
err = mp_exptmod(&tmpG, &tmpX, P, Y);
mp_clear_multi(&tmpG, &tmpX, NULL);
return err;
}
dr = mp_dr_is_modulus(P);
if (dr == 0) {
dr = mp_reduce_is_2k(P) << 1;
}
/* if the modulus is odd use the fast method */
if ((mp_isodd (P) == 1 || dr == 1) && P->used > 4) {
if ((mp_isodd (P) == 1 || dr != 0) && P->used > 4) {
return mp_exptmod_fast (G, X, P, Y, dr);
} else {
return f_mp_exptmod (G, X, P, Y);
return s_mp_exptmod (G, X, P, Y);
}
}
static int
f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
mp_int M[256], res, mu;
mp_digit buf;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = mp_count_bits (X);
if (x <= 7) {
winsize = 2;
} else if (x <= 36) {
winsize = 3;
} else if (x <= 140) {
winsize = 4;
} else if (x <= 450) {
winsize = 5;
} else if (x <= 1303) {
winsize = 6;
} else if (x <= 3529) {
winsize = 7;
} else {
winsize = 8;
}
#ifdef MP_LOW_MEM
if (winsize > 5) {
winsize = 5;
}
#endif
/* init G array */
for (x = 0; x < (1 << winsize); x++) {
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
}
return err;
}
}
/* create mu, used for Barrett reduction */
if ((err = mp_init (&mu)) != MP_OKAY) {
goto __M;
}
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
goto __MU;
}
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
goto __MU;
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
for (x = 0; x < (winsize - 1); x++) {
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* setup result */
if ((err = mp_init (&res)) != MP_OKAY) {
goto __MU;
}
mp_set (&res, 1);
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
}
buf = X->dp[digidx--];
bitcnt = (int) DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0)
continue;
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
continue;
}
/* else we add it to the window */
bitbuf |= (y << (winsize - ++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __MU;
}
/* empty window and reset */
bitcpy = bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
}
}
mp_exch (&res, Y);
err = MP_OKAY;
__RES:mp_clear (&res);
__MU:mp_clear (&mu);
__M:
for (x = 0; x < (1 << winsize); x++) {
mp_clear (&M[x]);
}
return err;
}

View File

@ -27,6 +27,11 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
mp_int M[256], res;
mp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* use a pointer to the reduction algorithm. This allows us to use
* one of many reduction algorithms without modding the guts of
* the code with if statements everywhere.
*/
int (*redux)(mp_int*,mp_int*,mp_digit);
/* find window size */
@ -64,6 +69,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
}
}
/* determine and setup reduction code */
if (redmode == 0) {
/* now setup montgomery */
if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
@ -71,17 +77,23 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
}
/* automatically pick the comba one if available (saves quite a few calls/ifs) */
if ( ((P->used * 2 + 1) < MP_WARRAY) &&
if (((P->used * 2 + 1) < MP_WARRAY) &&
P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
redux = fast_mp_montgomery_reduce;
} else {
/* use slower baselien method */
redux = mp_montgomery_reduce;
}
} else {
} else if (redmode == 1) {
/* setup DR reduction */
mp_dr_setup(P, &mp);
redux = mp_dr_reduce;
} else {
/* setup 2k reduction */
if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
goto __M;
}
redux = mp_reduce_2k;
}
/* setup result */
@ -142,7 +154,8 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
bitcpy = 0;
bitbuf = 0;
for (;;) {
/* grab next digit as required */
@ -203,7 +216,8 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
}
/* empty window and reset */
bitcpy = bitbuf = 0;
bitcpy = 0;
bitbuf = 0;
mode = 1;
}
}
@ -233,7 +247,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
}
if (redmode == 0) {
/* fixup result */
/* fixup result if Montgomery reduction is used */
if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
goto __RES;
}

View File

@ -24,7 +24,7 @@ mp_init (mp_int * a)
return MP_MEM;
}
/* set the used to zero, allocated digit to the default precision
/* set the used to zero, allocated digits to the default precision
* and sign to positive */
a->used = 0;
a->alloc = MP_PREC;

View File

@ -14,24 +14,34 @@
*/
#include <tommath.h>
/* c = |a| * |b| using Karatsuba Multiplication using three half size multiplications
/* c = |a| * |b| using Karatsuba Multiplication using
* three half size multiplications
*
* Let B represent the radix [e.g. 2**DIGIT_BIT] and let n represent half of the number of digits in the min(a,b)
* Let B represent the radix [e.g. 2**DIGIT_BIT] and
* let n represent half of the number of digits in
* the min(a,b)
*
* a = a1 * B^n + a0
* b = b1 * B^n + b0
* a = a1 * B**n + a0
* b = b1 * B**n + b0
*
* Then, a * b => a1b1 * B^2n + ((a1 - b1)(a0 - b0) + a0b0 + a1b1) * B + a0b0
* Then, a * b =>
a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
*
* Note that a1b1 and a0b0 are used twice and only need to be computed once. So in total
* three half size (half # of digit) multiplications are performed, a0b0, a1b1 and (a1-b1)(a0-b0)
* Note that a1b1 and a0b0 are used twice and only need to be
* computed once. So in total three half size (half # of
* digit) multiplications are performed, a0b0, a1b1 and
* (a1-b1)(a0-b0)
*
* Note that a multiplication of half the digits requires 1/4th the number of single precision
* multiplications so in total after one call 25% of the single precision multiplications are saved.
* Note also that the call to mp_mul can end up back in this function if the a0, a1, b0, or b1 are above
* the threshold. This is known as divide-and-conquer and leads to the famous O(N^lg(3)) or O(N^1.584) work which
* is asymptopically lower than the standard O(N^2) that the baseline/comba methods use. Generally though the
* overhead of this method doesn't pay off until a certain size (N ~ 80) is reached.
* Note that a multiplication of half the digits requires
* 1/4th the number of single precision multiplications so in
* total after one call 25% of the single precision multiplications
* are saved. Note also that the call to mp_mul can end up back
* in this function if the a0, a1, b0, or b1 are above the threshold.
* This is known as divide-and-conquer and leads to the famous
* O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
* the standard O(N**2) that the baseline/comba methods use.
* Generally though the overhead of this method doesn't pay off
* until a certain size (N ~ 80) is reached.
*/
int
mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
@ -101,14 +111,15 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
}
}
/* only need to clamp the lower words since by definition the upper words x1/y1 must
* have a known number of digits
/* only need to clamp the lower words since by definition the
* upper words x1/y1 must have a known number of digits
*/
mp_clamp (&x0);
mp_clamp (&y0);
/* now calc the products x0y0 and x1y1 */
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) /* after this x0 is no longer required, free temp [x0==t2]! */
/* after this x0 is no longer required, free temp [x0==t2]! */
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
goto X1Y1; /* x0y0 = x0*y0 */
if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
goto X1Y1; /* x1y1 = x1*y1 */

View File

@ -14,10 +14,12 @@
*/
#include <tommath.h>
/* Karatsuba squaring, computes b = a*a using three half size squarings
/* Karatsuba squaring, computes b = a*a using three
* half size squarings
*
* See comments of mp_karatsuba_mul for details. It is essentially the same algorithm
* but merely tuned to perform recursive squarings.
* See comments of mp_karatsuba_mul for details. It
* is essentially the same algorithm but merely
* tuned to perform recursive squarings.
*/
int
mp_karatsuba_sqr (mp_int * a, mp_int * b)
@ -74,32 +76,32 @@ mp_karatsuba_sqr (mp_int * a, mp_int * b)
/* now calc the products x0*x0 and x1*x1 */
if (mp_sqr (&x0, &x0x0) != MP_OKAY)
goto X1X1; /* x0x0 = x0*x0 */
goto X1X1; /* x0x0 = x0*x0 */
if (mp_sqr (&x1, &x1x1) != MP_OKAY)
goto X1X1; /* x1x1 = x1*x1 */
goto X1X1; /* x1x1 = x1*x1 */
/* now calc (x1-x0)^2 */
/* now calc (x1-x0)**2 */
if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
goto X1X1; /* t1 = x1 - x0 */
goto X1X1; /* t1 = x1 - x0 */
if (mp_sqr (&t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
/* add x0y0 */
if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
goto X1X1; /* t2 = x0y0 + x1y1 */
goto X1X1; /* t2 = x0x0 + x1x1 */
if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
/* shift by B */
if (mp_lshd (&t1, B) != MP_OKAY)
goto X1X1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
goto X1X1; /* x1y1 = x1y1 << 2*B */
goto X1X1; /* x1x1 = x1x1 << 2*B */
if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + t1 */
goto X1X1; /* t1 = x0x0 + t1 */
if (mp_add (&t1, &x1x1, b) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
err = MP_OKAY;

View File

@ -33,29 +33,29 @@ mp_lshd (mp_int * a, int b)
}
{
register mp_digit *tmpa, *tmpaa;
register mp_digit *top, *bottom;
/* increment the used by the shift amount than copy upwards */
/* increment the used by the shift amount then copy upwards */
a->used += b;
/* top */
tmpa = a->dp + a->used - 1;
top = a->dp + a->used - 1;
/* base */
tmpaa = a->dp + a->used - 1 - b;
bottom = a->dp + a->used - 1 - b;
/* much like mp_rshd this is implemented using a sliding window
* except the window goes the otherway around. Copying from
* the bottom to the top. see bn_mp_rshd.c for more info.
*/
for (x = a->used - 1; x >= b; x--) {
*tmpa-- = *tmpaa--;
*top-- = *bottom--;
}
/* zero the lower digits */
tmpa = a->dp;
top = a->dp;
for (x = 0; x < b; x++) {
*tmpa++ = 0;
*top++ = 0;
}
}
return MP_OKAY;

View File

@ -17,31 +17,5 @@
int
mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
{
mp_int t, t2;
int res;
if ((res = mp_init (&t)) != MP_OKAY) {
return res;
}
if ((res = mp_init (&t2)) != MP_OKAY) {
mp_clear (&t);
return res;
}
mp_set (&t, b);
mp_div (a, &t, NULL, &t2);
if (t2.sign == MP_NEG) {
if ((res = mp_add_d (&t2, b, &t2)) != MP_OKAY) {
mp_clear (&t);
mp_clear (&t2);
return res;
}
}
*c = t2.dp[0];
mp_clear (&t);
mp_clear (&t2);
return MP_OKAY;
return mp_div_d(a, b, NULL, c);
}

View File

@ -14,12 +14,12 @@
*/
#include <tommath.h>
/* computes xR^-1 == x (mod N) via Montgomery Reduction */
/* computes xR**-1 == x (mod N) via Montgomery Reduction */
int
mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
int ix, res, digs;
mp_digit ui;
mp_digit mu;
/* can the fast reduction [comba] method be used?
*
@ -27,55 +27,60 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
* than the available columns [255 per default] since carries
* are fixed up in the inner loop.
*/
digs = m->used * 2 + 1;
if ((digs < MP_WARRAY)
&& m->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_mp_montgomery_reduce (a, m, mp);
digs = n->used * 2 + 1;
if ((digs < MP_WARRAY) &&
n->used <
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_mp_montgomery_reduce (x, n, rho);
}
/* grow the input as required */
if (a->alloc < m->used * 2 + 1) {
if ((res = mp_grow (a, m->used * 2 + 1)) != MP_OKAY) {
if (x->alloc < digs) {
if ((res = mp_grow (x, digs)) != MP_OKAY) {
return res;
}
}
a->used = m->used * 2 + 1;
x->used = digs;
for (ix = 0; ix < m->used; ix++) {
/* ui = ai * m' mod b */
ui = (a->dp[ix] * mp) & MP_MASK;
for (ix = 0; ix < n->used; ix++) {
/* mu = ai * m' mod b */
mu = (x->dp[ix] * rho) & MP_MASK;
/* a = a + ui * m * b^i */
/* a = a + mu * m * b**i */
{
register int iy;
register mp_digit *tmpx, *tmpy, mu;
register mp_digit *tmpn, *tmpx, u;
register mp_word r;
/* aliases */
tmpx = m->dp;
tmpy = a->dp + ix;
tmpn = n->dp;
tmpx = x->dp + ix;
mu = 0;
for (iy = 0; iy < m->used; iy++) {
r = ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) + ((mp_word) * tmpy);
mu = (r >> ((mp_word) DIGIT_BIT));
*tmpy++ = (r & ((mp_word) MP_MASK));
/* set the carry to zero */
u = 0;
/* Multiply and add in place */
for (iy = 0; iy < n->used; iy++) {
r = ((mp_word) mu) * ((mp_word) * tmpn++) +
((mp_word) u) + ((mp_word) * tmpx);
u = (r >> ((mp_word) DIGIT_BIT));
*tmpx++ = (r & ((mp_word) MP_MASK));
}
/* propagate carries */
while (mu) {
*tmpy += mu;
mu = (*tmpy >> DIGIT_BIT) & 1;
*tmpy++ &= MP_MASK;
while (u) {
*tmpx += u;
u = *tmpx >> DIGIT_BIT;
*tmpx++ &= MP_MASK;
}
}
}
/* A = A/b^n */
mp_rshd (a, m->used);
/* x = x/b**n.used */
mp_rshd (x, n->used);
/* if A >= m then A = A - m */
if (mp_cmp_mag (a, m) != MP_LT) {
return s_mp_sub (a, m, a);
if (mp_cmp_mag (x, n) != MP_LT) {
return s_mp_sub (x, n, x);
}
return MP_OKAY;

View File

@ -16,38 +16,38 @@
/* setups the montgomery reduction stuff */
int
mp_montgomery_setup (mp_int * a, mp_digit * mp)
mp_montgomery_setup (mp_int * n, mp_digit * rho)
{
mp_digit x, b;
/* fast inversion mod 2^k
/* fast inversion mod 2**k
*
* Based on the fact that
*
* XA = 1 (mod 2^n) => (X(2-XA)) A = 1 (mod 2^2n)
* => 2*X*A - X*X*A*A = 1
* => 2*(1) - (1) = 1
* XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
* => 2*X*A - X*X*A*A = 1
* => 2*(1) - (1) = 1
*/
b = a->dp[0];
b = n->dp[0];
if ((b & 1) == 0) {
return MP_VAL;
}
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2^4 */
x *= 2 - b * x; /* here x*a==1 mod 2^8 */
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
x *= 2 - b * x; /* here x*a==1 mod 2**8 */
#if !defined(MP_8BIT)
x *= 2 - b * x; /* here x*a==1 mod 2^16; each step doubles the nb of bits */
x *= 2 - b * x; /* here x*a==1 mod 2**16 */
#endif
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
x *= 2 - b * x; /* here x*a==1 mod 2^32 */
x *= 2 - b * x; /* here x*a==1 mod 2**32 */
#endif
#ifdef MP_64BIT
x *= 2 - b * x; /* here x*a==1 mod 2^64 */
x *= 2 - b * x; /* here x*a==1 mod 2**64 */
#endif
/* t = -1/m mod b */
*mp = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
/* rho = -1/m mod b */
*rho = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
return MP_OKAY;
}

View File

@ -20,19 +20,24 @@ mp_mul (mp_int * a, mp_int * b, mp_int * c)
{
int res, neg;
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
if (MIN (a->used, b->used) > KARATSUBA_MUL_CUTOFF) {
if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
res = mp_toom_mul(a, b, c);
} else if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
res = mp_karatsuba_mul (a, b, c);
} else {
/* can we use the fast multiplier?
*
* The fast multiplier can be used if the output will have less than
* MP_WARRAY digits and the number of digits won't affect carry propagation
* The fast multiplier can be used if the output will
* have less than MP_WARRAY digits and the number of
* digits won't affect carry propagation
*/
int digs = a->used + b->used + 1;
if ((digs < MP_WARRAY)
&& MIN(a->used, b->used) <= (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
if ((digs < MP_WARRAY) &&
MIN(a->used, b->used) <=
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
res = fast_s_mp_mul_digs (a, b, c, digs);
} else {
res = s_mp_mul (a, b, c);

View File

@ -14,22 +14,8 @@
*/
#include <tommath.h>
/* pre-calculate the value required for Barrett reduction
* For a given modulus "b" it calulates the value required in "a"
*/
int
mp_reduce_setup (mp_int * a, mp_int * b)
{
int res;
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
return res;
}
res = mp_div (a, b, a, NULL);
return res;
}
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
/* reduces x mod m, assumes 0 < x < m**2, mu is
* precomputed via mp_reduce_setup.
* From HAC pp.604 Algorithm 14.42
*/
int
@ -38,11 +24,12 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
mp_int q;
int res, um = m->used;
/* q = x */
if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
return res;
}
/* q1 = x / b^(k-1) */
/* q1 = x / b**(k-1) */
mp_rshd (&q, um - 1);
/* according to HAC this is optimization is ok */
@ -56,15 +43,15 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
}
}
/* q3 = q2 / b^(k+1) */
/* q3 = q2 / b**(k+1) */
mp_rshd (&q, um + 1);
/* x = x mod b^(k+1), quick (no division) */
/* x = x mod b**(k+1), quick (no division) */
if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
goto CLEANUP;
}
/* q = q * m mod b^(k+1), quick (no division) */
/* q = q * m mod b**(k+1), quick (no division) */
if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
goto CLEANUP;
}
@ -74,7 +61,7 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
goto CLEANUP;
}
/* If x < 0, add b^(k+1) to it */
/* If x < 0, add b**(k+1) to it */
if (mp_cmp_d (x, 0) == MP_LT) {
mp_set (&q, 1);
if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
@ -89,7 +76,7 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
break;
}
}
CLEANUP:
mp_clear (&q);

56
bn_mp_reduce_2k.c Normal file
View File

@ -0,0 +1,56 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* reduces a modulo n where n is of the form 2**p - k */
int
mp_reduce_2k(mp_int *a, mp_int *n, mp_digit k)
{
mp_int q;
int p, res;
if ((res = mp_init(&q)) != MP_OKAY) {
return res;
}
p = mp_count_bits(n);
top:
/* q = a/2**p, a = a mod 2**p */
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
goto ERR;
}
if (k != 1) {
/* q = q * k */
if ((res = mp_mul_d(&q, k, &q)) != MP_OKAY) {
goto ERR;
}
}
/* a = a + q */
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
goto ERR;
}
if (mp_cmp_mag(a, n) != MP_LT) {
s_mp_sub(a, n, a);
goto top;
}
ERR:
mp_clear(&q);
return res;
}

42
bn_mp_reduce_2k_setup.c Normal file
View File

@ -0,0 +1,42 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* determines the setup value */
int
mp_reduce_2k_setup(mp_int *a, mp_digit *d)
{
int res, p;
mp_int tmp;
if ((res = mp_init(&tmp)) != MP_OKAY) {
return res;
}
p = mp_count_bits(a);
if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
mp_clear(&tmp);
return res;
}
if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
mp_clear(&tmp);
return res;
}
*d = tmp.dp[0];
mp_clear(&tmp);
return MP_OKAY;
}

37
bn_mp_reduce_is_2k.c Normal file
View File

@ -0,0 +1,37 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* determines if mp_reduce_2k can be used */
int
mp_reduce_is_2k(mp_int *a)
{
int ix, iy;
if (a->used == 0) {
return 0;
} else if (a->used == 1) {
return 1;
} else if (a->used > 1) {
iy = mp_count_bits(a);
for (ix = DIGIT_BIT; ix < iy; ix++) {
if ((a->dp[ix/DIGIT_BIT] & ((mp_digit)1 << (mp_digit)(ix % DIGIT_BIT))) == 0) {
return 0;
}
}
}
return 1;
}

29
bn_mp_reduce_setup.c Normal file
View File

@ -0,0 +1,29 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* pre-calculate the value required for Barrett reduction
* For a given modulus "b" it calulates the value required in "a"
*/
int
mp_reduce_setup (mp_int * a, mp_int * b)
{
int res;
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
return res;
}
return mp_div (a, b, a, NULL);
}

View File

@ -32,15 +32,15 @@ mp_rshd (mp_int * a, int b)
}
{
register mp_digit *tmpa, *tmpaa;
register mp_digit *bottom, *top;
/* shift the digits down */
/* base */
tmpa = a->dp;
/* bottom */
bottom = a->dp;
/* offset into digits */
tmpaa = a->dp + b;
/* top [offset into digits] */
top = a->dp + b;
/* this is implemented as a sliding window where
* the window is b-digits long and digits from
@ -53,13 +53,15 @@ mp_rshd (mp_int * a, int b)
\-------------------/ ---->
*/
for (x = 0; x < (a->used - b); x++) {
*tmpa++ = *tmpaa++;
*bottom++ = *top++;
}
/* zero the top digits */
for (; x < a->used; x++) {
*tmpa++ = 0;
*bottom++ = 0;
}
}
mp_clamp (a);
/* remove excess digits */
a->used -= b;
}

View File

@ -35,7 +35,7 @@ mp_set_int (mp_int * a, unsigned int b)
b <<= 4;
/* ensure that digits are not clamped off */
a->used += 32 / DIGIT_BIT + 2;
a->used += 1;
}
mp_clamp (a);
return MP_OKAY;

View File

@ -19,12 +19,16 @@ int
mp_sqr (mp_int * a, mp_int * b)
{
int res;
if (a->used > KARATSUBA_SQR_CUTOFF) {
if (a->used >= TOOM_SQR_CUTOFF) {
res = mp_toom_sqr(a, b);
} else if (a->used >= KARATSUBA_SQR_CUTOFF) {
res = mp_karatsuba_sqr (a, b);
} else {
/* can we use the fast multiplier? */
if ((a->used * 2 + 1) < 512 && a->used < (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
if ((a->used * 2 + 1) < MP_WARRAY &&
a->used <
(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
res = fast_s_mp_sqr (a, b);
} else {
res = s_mp_sqr (a, b);

268
bn_mp_toom_mul.c Normal file
View File

@ -0,0 +1,268 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* multiplication using Toom-Cook 3-way algorithm */
int
mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
{
mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
int res, B;
/* init temps */
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
return res;
}
/* B */
B = MIN(a->used, b->used) / 3;
/* a = a2 * B^2 + a1 * B + a0 */
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a1, B);
mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a2, B*2);
/* b = b2 * B^2 + b1 * B + b0 */
if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_copy(b, &b1)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&b1, B);
mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
if ((res = mp_copy(b, &b2)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&b2, B*2);
/* w0 = a0*b0 */
if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
goto ERR;
}
/* w4 = a2 * b2 */
if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
goto ERR;
}
/* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
goto ERR;
}
/* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
goto ERR;
}
/* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
goto ERR;
}
/* now solve the matrix
0 0 0 0 1
1 2 4 8 16
1 1 1 1 1
16 8 4 2 1
1 0 0 0 0
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication
*/
/* r1 - r4 */
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r0 */
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/2 */
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3/2 */
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
goto ERR;
}
/* r2 - r0 - r4 */
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1 - 8r0 */
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - 8r4 */
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
goto ERR;
}
/* 3r2 - r1 - r3 */
if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/3 */
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
goto ERR;
}
/* r3/3 */
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
goto ERR;
}
/* at this point shift W[n] by B*n */
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
goto ERR;
}
ERR:
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &b0, &b1, &b2, &tmp1, &tmp2, NULL);
return res;
}

220
bn_mp_toom_sqr.c Normal file
View File

@ -0,0 +1,220 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* squaring using Toom-Cook 3-way algorithm */
int
mp_toom_sqr(mp_int *a, mp_int *b)
{
mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
int res, B;
/* init temps */
if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
return res;
}
/* B */
B = a->used / 3;
/* a = a2 * B^2 + a1 * B + a0 */
if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_copy(a, &a1)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a1, B);
mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
if ((res = mp_copy(a, &a2)) != MP_OKAY) {
goto ERR;
}
mp_rshd(&a2, B*2);
/* w0 = a0*a0 */
if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
goto ERR;
}
/* w4 = a2 * a2 */
if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
goto ERR;
}
/* w1 = (a2 + 2(a1 + 2a0))**2 */
if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
goto ERR;
}
/* w3 = (a0 + 2(a1 + 2a2))**2 */
if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
goto ERR;
}
/* w2 = (a2 + a1 + a0)**2 */
if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
goto ERR;
}
/* now solve the matrix
0 0 0 0 1
1 2 4 8 16
1 1 1 1 1
16 8 4 2 1
1 0 0 0 0
using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
*/
/* r1 - r4 */
if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r0 */
if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/2 */
if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3/2 */
if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
goto ERR;
}
/* r2 - r0 - r4 */
if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1 - 8r0 */
if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - 8r4 */
if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
goto ERR;
}
/* 3r2 - r1 - r3 */
if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
goto ERR;
}
/* r1 - r2 */
if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
goto ERR;
}
/* r3 - r2 */
if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
goto ERR;
}
/* r1/3 */
if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
goto ERR;
}
/* r3/3 */
if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
goto ERR;
}
/* at this point shift W[n] by B*n */
if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
goto ERR;
}
if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
goto ERR;
}
ERR:
mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
return res;
}

View File

@ -45,7 +45,6 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
olduse = c->used;
c->used = max + 1;
/* set the carry to zero */
{
register mp_digit u, *tmpa, *tmpb, *tmpc;
register int i;

211
bn_s_mp_exptmod.c Normal file
View File

@ -0,0 +1,211 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
int
s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
mp_int M[256], res, mu;
mp_digit buf;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = mp_count_bits (X);
if (x <= 7) {
winsize = 2;
} else if (x <= 36) {
winsize = 3;
} else if (x <= 140) {
winsize = 4;
} else if (x <= 450) {
winsize = 5;
} else if (x <= 1303) {
winsize = 6;
} else if (x <= 3529) {
winsize = 7;
} else {
winsize = 8;
}
#ifdef MP_LOW_MEM
if (winsize > 5) {
winsize = 5;
}
#endif
/* init M array */
for (x = 0; x < (1 << winsize); x++) {
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
}
return err;
}
}
/* create mu, used for Barrett reduction */
if ((err = mp_init (&mu)) != MP_OKAY) {
goto __M;
}
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
goto __MU;
}
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G**x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
goto __MU;
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
for (x = 0; x < (winsize - 1); x++) {
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* setup result */
if ((err = mp_init (&res)) != MP_OKAY) {
goto __MU;
}
mp_set (&res, 1);
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = 0;
bitbuf = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
}
buf = X->dp[digidx--];
bitcnt = (int) DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0)
continue;
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
continue;
}
/* else we add it to the window */
bitbuf |= (y << (winsize - ++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __MU;
}
/* empty window and reset */
bitcpy = 0;
bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
}
}
mp_exch (&res, Y);
err = MP_OKAY;
__RES:mp_clear (&res);
__MU:mp_clear (&mu);
__M:
for (x = 0; x < (1 << winsize); x++) {
mp_clear (&M[x]);
}
return err;
}

View File

@ -20,8 +20,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
{
mp_int t;
int res, ix, iy, pa;
mp_word r, u;
mp_digit tmpx, *tmpt;
mp_word r;
mp_digit u, tmpx, *tmpt;
pa = a->used;
if ((res = mp_init_size (&t, pa + pa + 1)) != MP_OKAY) {
@ -32,7 +32,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
for (ix = 0; ix < pa; ix++) {
/* first calculate the digit at 2*ix */
/* calculate double precision result */
r = ((mp_word) t.dp[ix + ix]) + ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
r = ((mp_word) t.dp[ix + ix]) +
((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
/* store lower part in result */
t.dp[ix + ix] = (mp_digit) (r & ((mp_word) MP_MASK));
@ -44,7 +45,8 @@ s_mp_sqr (mp_int * a, mp_int * b)
tmpx = a->dp[ix];
/* alias for where to store the results */
tmpt = &(t.dp[ix + ix + 1]);
tmpt = t.dp + (ix + ix + 1);
for (iy = ix + 1; iy < pa; iy++) {
/* first calculate the product */
r = ((mp_word) tmpx) * ((mp_word) a->dp[iy]);
@ -60,13 +62,9 @@ s_mp_sqr (mp_int * a, mp_int * b)
/* get carry */
u = (r >> ((mp_word) DIGIT_BIT));
}
r = ((mp_word) * tmpt) + u;
*tmpt = (mp_digit) (r & ((mp_word) MP_MASK));
u = (r >> ((mp_word) DIGIT_BIT));
/* propagate upwards */
++tmpt;
while (u != ((mp_word) 0)) {
r = ((mp_word) * tmpt) + ((mp_word) 1);
while (u != ((mp_digit) 0)) {
r = ((mp_word) * tmpt) + ((mp_word) u);
*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
u = (r >> ((mp_word) DIGIT_BIT));
}

View File

@ -33,7 +33,6 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
olduse = c->used;
c->used = max;
/* sub digits from lower part */
{
register mp_digit u, *tmpa, *tmpb, *tmpc;
register int i;
@ -52,7 +51,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
/* U = carry bit of T[i]
* Note this saves performing an AND operation since
* if a carry does occur it will propagate all the way to the
* MSB. As a result a single shift is required to get the carry
* MSB. As a result a single shift is enough to get the carry
*/
u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
@ -81,3 +80,4 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
mp_clamp (c);
return MP_OKAY;
}

View File

@ -18,11 +18,14 @@
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
-------------------------------------------------------------
Intel P4 /GCC v3.2 / 81/ 110
Intel P4 /GCC v3.2 / 70/ 108
AMD Athlon XP /GCC v3.2 / 109/ 127
*/
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 127; /* Min. number of digits before Karatsuba squaring is used. */
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400;

View File

@ -1,3 +1,15 @@
May 29th, 2003
v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not very elegantly.
(e.g. correct result, just bad looking code)
-- Fixed bug in mp_sqr which still had a 512 constant instead of MP_WARRAY
-- Added Toom-Cook multipliers [needs tuning!]
-- Added efficient divide by 3 algorithm mp_div_3
-- Re-wrote mp_div_d to be faster than calling mp_div
-- Added in a donated BCC makefile and a single page LTM poster (ahalhabsi@sbcglobal.net)
-- Added mp_reduce_2k which reduces an input modulo n = 2**p - k for any single digit k
-- Made the exptmod system be aware of the 2k reduction algorithms.
-- Rewrote mp_dr_reduce to be smaller, simpler and easier to understand.
May 17th, 2003
v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well
as several smaller suggestions. Thanks!

View File

@ -53,7 +53,7 @@ int main(void)
#ifdef TIMER
int n;
ulong64 tt;
FILE *log, *logb;
FILE *log, *logb, *logc;
#endif
mp_init(&a);
@ -62,11 +62,54 @@ int main(void)
mp_init(&d);
mp_init(&e);
mp_init(&f);
srand(time(NULL));
/* test mp_reduce_2k */
#if 0
for (cnt = 3; cnt <= 4096; ++cnt) {
mp_digit tmp;
mp_2expt(&a, cnt);
mp_sub_d(&a, 1, &a); /* a = 2**cnt - 1 */
printf("\nTesting %4d bits", cnt);
printf("(%d)", mp_reduce_is_2k(&a));
mp_reduce_2k_setup(&a, &tmp);
printf("(%d)", tmp);
for (ix = 0; ix < 100000; ix++) {
if (!(ix & 1023)) {printf("."); fflush(stdout); }
mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2);
mp_copy(&c, &b);
mp_mod(&c, &a, &c);
mp_reduce_2k(&b, &a, 1);
if (mp_cmp(&c, &b)) {
printf("FAILED\n");
exit(0);
}
}
}
#endif
/* test mp_div_3 */
#if 0
for (cnt = 0; cnt < 1000000; ) {
mp_digit r1, r2;
if (!(++cnt & 127)) printf("%9d\r", cnt);
mp_rand(&a, abs(rand()) % 32 + 1);
mp_div_d(&a, 3, &b, &r1);
mp_div_3(&a, &c, &r2);
if (mp_cmp(&b, &c) || r1 != r2) {
printf("Failure\n");
}
}
#endif
/* test the DR reduction */
#if 0
srand(time(NULL));
for (cnt = 2; cnt < 32; cnt++) {
printf("%d digit modulus\n", cnt);
mp_grow(&a, cnt);
@ -91,6 +134,7 @@ int main(void)
if (mp_cmp(&b, &c) != MP_EQ) {
printf("Failed on trial %lu\n", rr); exit(-1);
}
} while (++rr < 1000000);
printf("Passed DR test for %d digits\n", cnt);
@ -98,6 +142,9 @@ int main(void)
#endif
#ifdef TIMER
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC);
log = fopen("logs/add.log", "w");
@ -172,9 +219,16 @@ int main(void)
}
fclose(log);
}
{
{
char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
@ -196,6 +250,7 @@ int main(void)
};
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) {
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
@ -224,11 +279,12 @@ int main(void)
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf((n < 7) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
}
fclose(log);
fclose(logb);
fclose(logc);
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
@ -263,6 +319,12 @@ int main(void)
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = 0;
/* force KARA and TOOM to enable despite cutoffs */
KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 110;
TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 150;
for (;;) {
/* randomly clear and re-init one variable, this has the affect of triming the alloc space */

2
etc/2kprime.1 Normal file
View File

@ -0,0 +1,2 @@
256-bits (k = 36113) = 115792089237316195423570985008687907853269984665640564039457584007913129603823
512-bits (k = 38117) = 13407807929942597099574024998205846127479365820592393377723561443721764030073546976801874298166903427690031858186486050853753882811946569946433649006045979

80
etc/2kprime.c Normal file
View File

@ -0,0 +1,80 @@
/* Makes safe primes of a 2k nature */
#include <tommath.h>
#include <time.h>
int sizes[] = {256, 512, 768, 1024, 1536, 2048, 3072, 4096};
int main(void)
{
char buf[2000];
int x, y, t;
mp_int q, p;
FILE *out;
clock_t t1;
mp_digit z;
mp_init_multi(&q, &p, NULL);
out = fopen("2kprime.1", "w");
for (x = 0; x < (int)(sizeof(sizes) / sizeof(sizes[0])); x++) {
top:
mp_2expt(&q, sizes[x]);
mp_add_d(&q, 3, &q);
z = -3;
t1 = clock();
for(;;) {
mp_sub_d(&q, 4, &q);
z += 4;
if (z > MP_MASK) {
printf("No primes of size %d found\n", sizes[x]);
break;
}
if (clock() - t1 > CLOCKS_PER_SEC) {
printf("."); fflush(stdout);
// sleep((clock() - t1 + CLOCKS_PER_SEC/2)/CLOCKS_PER_SEC);
t1 = clock();
}
/* quick test on q */
mp_prime_is_prime(&q, 1, &y);
if (y == 0) {
continue;
}
/* find (q-1)/2 */
mp_sub_d(&q, 1, &p);
mp_div_2(&p, &p);
mp_prime_is_prime(&p, 3, &y);
if (y == 0) {
continue;
}
/* test on q */
mp_prime_is_prime(&q, 3, &y);
if (y == 0) {
continue;
}
break;
}
if (y == 0) {
++sizes[x];
goto top;
}
mp_toradix(&q, buf, 10);
printf("\n\n%d-bits (k = %lu) = %s\n", sizes[x], z, buf);
fprintf(out, "%d-bits (k = %lu) = %s\n", sizes[x], z, buf); fflush(out);
}
return 0;
}

View File

@ -32,9 +32,13 @@ mersenne: mersenne.o
drprime: drprime.o
$(CC) drprime.o $(LIBNAME) -o drprime
# fines 2k safe primes for the given config
2kprime: 2kprime.o
$(CC) 2kprime.o $(LIBNAME) -o 2kprime
mont: mont.o
$(CC) mont.o $(LIBNAME) -o mont
clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime

View File

@ -14,4 +14,7 @@ tune: tune.obj
cl tune.obj ../tommath.lib
drprime: drprime.obj
cl drprime.obj ../tommath.lib
cl drprime.obj ../tommath.lib
2kprime: 2kprime.obj
cl 2kprime.obj ../tommath.lib

View File

@ -8,10 +8,9 @@
int
is_mersenne (long s, int *pp)
{
mp_int n, u, mu;
mp_int n, u;
int res, k;
long ss;
*pp = 0;
if ((res = mp_init (&n)) != MP_OKAY) {
@ -22,27 +21,14 @@ is_mersenne (long s, int *pp)
goto __N;
}
if ((res = mp_init (&mu)) != MP_OKAY) {
goto __U;
}
/* n = 2^s - 1 */
mp_set (&n, 1);
ss = s;
while (ss--) {
if ((res = mp_mul_2 (&n, &n)) != MP_OKAY) {
goto __MU;
}
if ((res = mp_2expt(&n, s)) != MP_OKAY) {
goto __MU;
}
if ((res = mp_sub_d (&n, 1, &n)) != MP_OKAY) {
goto __MU;
}
/* setup mu */
if ((res = mp_reduce_setup (&mu, &n)) != MP_OKAY) {
goto __MU;
}
/* set u=4 */
mp_set (&u, 4);
@ -57,26 +43,26 @@ is_mersenne (long s, int *pp)
}
/* make sure u is positive */
if (u.sign == MP_NEG) {
while (u.sign == MP_NEG) {
if ((res = mp_add (&u, &n, &u)) != MP_OKAY) {
goto __MU;
goto __MU;
}
}
/* reduce */
if ((res = mp_reduce (&u, &n, &mu)) != MP_OKAY) {
if ((res = mp_reduce_2k (&u, &n, 1)) != MP_OKAY) {
goto __MU;
}
}
/* if u == 0 then its prime */
if (mp_iszero (&u) == 1) {
*pp = 1;
mp_prime_is_prime(&n, 3, pp);
if (*pp != 1) printf("FAILURE\n");
}
res = MP_OKAY;
__MU:mp_clear (&mu);
__U:mp_clear (&u);
__MU:mp_clear (&u);
__N:mp_clear (&n);
return res;
}

View File

@ -7,10 +7,11 @@ int main(void)
mp_digit mp;
long x, y;
srand(time(NULL));
mp_init_multi(&modulus, &R, &p, &pp, NULL);
/* loop through various sizes */
for (x = 4; x < 128; x++) {
for (x = 4; x < 256; x++) {
printf("DIGITS == %3ld...", x); fflush(stdout);
/* make up the odd modulus */
@ -22,7 +23,7 @@ int main(void)
mp_montgomery_setup(&modulus, &mp);
/* now run through a bunch tests */
for (y = 0; y < 100000; y++) {
for (y = 0; y < 1000; y++) {
mp_rand(&p, x/2); /* p = random */
mp_mul(&p, &R, &pp); /* pp = R * p */
mp_montgomery_reduce(&pp, &modulus, mp);

View File

@ -8,17 +8,17 @@
#ifndef X86_TIMER
/* generic ISO C timer */
unsigned long long __T;
ulong64 __T;
void t_start(void) { __T = clock(); }
unsigned long long t_read(void) { return clock() - __T; }
ulong64 t_read(void) { return clock() - __T; }
#else
extern void t_start(void);
extern unsigned long long t_read(void);
extern ulong64 t_read(void);
#endif
unsigned long long
time_mult (void)
ulong64
time_mult (int max)
{
int x, y;
mp_int a, b, c;
@ -28,7 +28,7 @@ time_mult (void)
mp_init (&c);
t_start();
for (x = 32; x <= 288; x += 4) {
for (x = 32; x <= max; x += 4) {
mp_rand (&a, x);
mp_rand (&b, x);
for (y = 0; y < 100; y++) {
@ -41,8 +41,8 @@ time_mult (void)
return t_read();
}
unsigned long long
time_sqr (void)
ulong64
time_sqr (int max)
{
int x, y;
mp_int a, b;
@ -51,7 +51,7 @@ time_sqr (void)
mp_init (&b);
t_start();
for (x = 32; x <= 288; x += 4) {
for (x = 32; x <= max; x += 4) {
mp_rand (&a, x);
for (y = 0; y < 100; y++) {
mp_sqr (&a, &b);
@ -65,45 +65,85 @@ time_sqr (void)
int
main (void)
{
int best_mult, best_square;
unsigned long long best, ti;
int best_kmult, best_tmult, best_ksquare, best_tsquare;
ulong64 best, ti;
FILE *log;
best_mult = best_square = 0;
best_kmult = best_ksquare = best_tmult = best_tsquare = 0;
/* tune multiplication first */
/* effectively turn TOOM off */
TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 100000;
log = fopen ("mult.log", "w");
best = -1;
for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 200; KARATSUBA_MUL_CUTOFF++) {
ti = time_mult ();
ti = time_mult (300);
printf ("%4d : %9llu\r", KARATSUBA_MUL_CUTOFF, ti);
fprintf (log, "%d, %llu\n", KARATSUBA_MUL_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %llu, %d \n", ti, KARATSUBA_MUL_CUTOFF);
best = ti;
best_mult = KARATSUBA_MUL_CUTOFF;
best_kmult = KARATSUBA_MUL_CUTOFF;
}
}
fclose (log);
/* tune squaring */
log = fopen ("sqr.log", "w");
best = -1;
for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 200; KARATSUBA_SQR_CUTOFF++) {
ti = time_sqr ();
ti = time_sqr (300);
printf ("%4d : %9llu\r", KARATSUBA_SQR_CUTOFF, ti);
fprintf (log, "%d, %llu\n", KARATSUBA_SQR_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %llu, %d \n", ti, KARATSUBA_SQR_CUTOFF);
best = ti;
best_square = KARATSUBA_SQR_CUTOFF;
best_ksquare = KARATSUBA_SQR_CUTOFF;
}
}
fclose (log);
KARATSUBA_MUL_CUTOFF = best_kmult;
KARATSUBA_SQR_CUTOFF = best_ksquare;
/* tune TOOM mult */
log = fopen ("tmult.log", "w");
best = -1;
for (TOOM_MUL_CUTOFF = best_kmult*5; TOOM_MUL_CUTOFF <= 800; TOOM_MUL_CUTOFF++) {
ti = time_mult (1200);
printf ("%4d : %9llu\r", TOOM_MUL_CUTOFF, ti);
fprintf (log, "%d, %llu\n", TOOM_MUL_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %llu, %d \n", ti, TOOM_MUL_CUTOFF);
best = ti;
best_tmult = TOOM_MUL_CUTOFF;
}
}
fclose (log);
/* tune TOOM sqr */
log = fopen ("tsqr.log", "w");
best = -1;
for (TOOM_SQR_CUTOFF = best_ksquare*3; TOOM_SQR_CUTOFF <= 800; TOOM_SQR_CUTOFF++) {
ti = time_sqr (1200);
printf ("%4d : %9llu\r", TOOM_SQR_CUTOFF, ti);
fprintf (log, "%d, %llu\n", TOOM_SQR_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %llu, %d \n", ti, TOOM_SQR_CUTOFF);
best = ti;
best_tsquare = TOOM_SQR_CUTOFF;
}
}
fclose (log);
printf
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\n",
best_mult, best_square);
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\nToom Multiplier Cutoff: %d\nToom Squaring Cutoff: %d\n",
best_kmult, best_ksquare, best_tmult, best_tsquare);
return 0;
}

4
gen.pl
View File

@ -6,7 +6,7 @@
use strict;
open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!";
foreach my $filename (glob "bn_*.c") {
foreach my $filename (glob "bn*.c") {
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
print OUT "/* Start: $filename */\n";
print OUT qq[#line 0 "$filename"\n];
@ -14,5 +14,5 @@ foreach my $filename (glob "bn_*.c") {
print OUT "\n/* End: $filename */\n\n";
close SRC or die "Error closing $filename after reading: $!";
}
print OUT "\b/* EOF */\n";
print OUT "\n/* EOF */\n";
close OUT or die "Error closing mpi.c after writing: $!";

View File

@ -1,16 +1,16 @@
224 11039864
448 9206336
672 8178200
896 7432176
1120 6433264
1344 5847056
1568 5270184
1792 4943416
2016 4520016
2240 4256168
2464 3999224
2688 3714896
2912 3572720
3136 3340176
3360 3222584
3584 3036336
224 11069160
448 9156136
672 8089755
896 7399424
1120 6389352
1344 5818648
1568 5257112
1792 4982160
2016 4527856
2240 4325312
2464 4051760
2688 3767640
2912 3612520
3136 3415208
3360 3258656
3584 3113360

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -1,7 +1,7 @@
14364 666
21532 253
28700 117
57372 17
71708 9
86044 5
114716 2
513 680
769 257
1025 117
2049 17
2561 9
3073 5
4097 2

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

6
logs/expt_2k.log Normal file
View File

@ -0,0 +1,6 @@
521 736
607 552
1279 112
2203 33
3217 13
4253 6

View File

@ -1,7 +1,7 @@
14896 1088
21952 468
29008 244
43120 91
58016 43
86240 15
115248 6
532 1064
784 460
1036 240
1540 91
2072 43
3080 15
4116 6

View File

@ -1,5 +1,5 @@
set terminal png color
set size 1.5
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
@ -10,7 +10,7 @@ set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,32 +1,32 @@
112 15608
224 7840
336 5104
448 3376
560 2616
672 1984
784 1640
896 2056
1008 1136
1120 936
1232 1240
1344 1112
1456 608
1568 873
1680 492
1792 444
1904 640
2016 584
2128 328
2240 307
2352 283
2464 256
2576 393
2688 365
2800 344
2912 196
3024 301
3136 170
3248 160
3360 250
3472 144
3584 224
112 16248
224 8192
336 5320
448 3560
560 2728
672 2064
784 1704
896 2176
1008 1184
1120 976
1232 1280
1344 1176
1456 624
1568 912
1680 504
1792 452
1904 658
2016 608
2128 336
2240 312
2352 288
2464 264
2576 408
2688 376
2800 354
2912 198
3024 307
3136 173
3248 162
3360 256
3472 145
3584 226

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

13
logs/k7/README Normal file
View File

@ -0,0 +1,13 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

16
logs/k7/add.log Normal file
View File

@ -0,0 +1,16 @@
224 11069160
448 9156136
672 8089755
896 7399424
1120 6389352
1344 5818648
1568 5257112
1792 4982160
2016 4527856
2240 4325312
2464 4051760
2688 3767640
2912 3612520
3136 3415208
3360 3258656
3584 3113360

BIN
logs/k7/addsub.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

7
logs/k7/expt.log Normal file
View File

@ -0,0 +1,7 @@
513 664
769 256
1025 117
2049 17
2561 9
3073 5
4097 2

BIN
logs/k7/expt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

7
logs/k7/expt_dr.log Normal file
View File

@ -0,0 +1,7 @@
532 1088
784 460
1036 240
1540 92
2072 43
3080 15
4116 6

17
logs/k7/graphs.dem Normal file
View File

@ -0,0 +1,17 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

24
logs/k7/index.html Normal file
View File

@ -0,0 +1,24 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

32
logs/k7/invmod.log Normal file
View File

@ -0,0 +1,32 @@
112 16248
224 8192
336 5320
448 3560
560 2728
672 2064
784 1704
896 2176
1008 1184
1120 976
1232 1280
1344 1176
1456 624
1568 912
1680 504
1792 452
1904 658
2016 608
2128 336
2240 312
2352 288
2464 264
2576 408
2688 376
2800 354
2912 198
3024 307
3136 173
3248 162
3360 256
3472 145
3584 226

BIN
logs/k7/invmod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

17
logs/k7/mult.log Normal file
View File

@ -0,0 +1,17 @@
896 322904
1344 151592
1792 90472
2240 59984
2688 42624
3136 31872
3584 24704
4032 19704
4480 16096
4928 13376
5376 11272
5824 9616
6272 8360
6720 7304
7168 1664
7616 1472
8064 1328

BIN
logs/k7/mult.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

17
logs/k7/mult_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 322872
1344 151688
1792 90480
2240 59984
2688 42656
3136 32144
3584 25840
4032 21328
4480 17856
4928 14928
5376 12856
5824 11256
6272 9880
6720 8984
7168 7928
7616 7200
8064 6576

17
logs/k7/sqr.log Normal file
View File

@ -0,0 +1,17 @@
896 415472
1344 223736
1792 141232
2240 97624
2688 71400
3136 54800
3584 16904
4032 13528
4480 10968
4928 9128
5376 7784
5824 6672
6272 5760
6720 5056
7168 4440
7616 3952
8064 3512

17
logs/k7/sqr_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 420464
1344 224800
1792 142808
2240 97704
2688 71416
3136 54504
3584 38320
4032 32360
4480 27576
4928 23840
5376 20688
5824 18264
6272 16176
6720 14440
7168 11688
7616 10752
8064 9936

16
logs/k7/sub.log Normal file
View File

@ -0,0 +1,16 @@
224 9728504
448 8573648
672 7488096
896 6714064
1120 5950472
1344 5457400
1568 5038896
1792 4683632
2016 4384656
2240 4105976
2464 3871608
2688 3650680
2912 3463552
3136 3290016
3360 3135272
3584 2993848

View File

@ -1,17 +1,17 @@
896 321504
1344 150784
1792 90288
2240 59760
2688 42480
3136 32056
3584 24600
4032 19656
4480 16024
4928 13328
5376 11280
5824 9624
6272 8336
6720 7280
7168 1648
7616 1464
8064 1296
896 322904
1344 151592
1792 90472
2240 59984
2688 42624
3136 31872
3584 24704
4032 19704
4480 16096
4928 13376
5376 11272
5824 9616
6272 8360
6720 7304
7168 1664
7616 1472
8064 1328

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

After

Width:  |  Height:  |  Size: 8.1 KiB

View File

@ -1,17 +1,17 @@
896 321928
1344 150752
1792 90136
2240 59888
2688 42480
3136 32080
3584 25744
4032 21216
4480 17912
4928 14896
5376 12936
5824 11216
6272 9848
6720 8896
7168 7968
7616 7248
8064 6600
896 322872
1344 151688
1792 90480
2240 59984
2688 42656
3136 32144
3584 25840
4032 21328
4480 17856
4928 14928
5376 12856
5824 11256
6272 9880
6720 8984
7168 7928
7616 7200
8064 6576

13
logs/p4/README Normal file
View File

@ -0,0 +1,13 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

16
logs/p4/add.log Normal file
View File

@ -0,0 +1,16 @@
224 8113248
448 6585584
672 5687678
896 4761144
1120 4111592
1344 3995154
1568 3532387
1792 3225400
2016 2963960
2240 2720112
2464 2533952
2688 2307168
2912 2287064
3136 2150160
3360 2035992
3584 1936304

BIN
logs/p4/addsub.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.7 KiB

7
logs/p4/expt.log Normal file
View File

@ -0,0 +1,7 @@
513 195
769 68
1025 31
2049 4
2561 2
3073 1
4097 0

BIN
logs/p4/expt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.4 KiB

7
logs/p4/expt_dr.log Normal file
View File

@ -0,0 +1,7 @@
532 393
784 158
1036 79
1540 27
2072 12
3080 4
4116 1

17
logs/p4/graphs.dem Normal file
View File

@ -0,0 +1,17 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

24
logs/p4/index.html Normal file
View File

@ -0,0 +1,24 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

32
logs/p4/invmod.log Normal file
View File

@ -0,0 +1,32 @@
112 13608
224 6872
336 4264
448 2792
560 2144
672 1560
784 1296
896 1672
1008 896
1120 736
1232 1024
1344 888
1456 472
1568 680
1680 373
1792 328
1904 484
2016 436
2128 232
2240 211
2352 200
2464 177
2576 293
2688 262
2800 251
2912 137
3024 216
3136 117
3248 113
3360 181
3472 98
3584 158

BIN
logs/p4/invmod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

17
logs/p4/mult.log Normal file
View File

@ -0,0 +1,17 @@
896 77600
1344 35776
1792 19688
2240 13248
2688 9424
3136 7056
3584 5464
4032 4368
4480 3568
4928 2976
5376 2520
5824 2152
6272 1872
6720 1632
7168 650
7616 576
8064 515

BIN
logs/p4/mult.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

17
logs/p4/mult_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 77752
1344 35832
1792 19688
2240 14704
2688 10832
3136 8336
3584 6600
4032 5424
4480 4648
4928 3976
5376 3448
5824 3016
6272 2664
6720 2384
7168 2120
7616 1912
8064 1752

17
logs/p4/sqr.log Normal file
View File

@ -0,0 +1,17 @@
896 128088
1344 63640
1792 37968
2240 25488
2688 18176
3136 13672
3584 4920
4032 3912
4480 3160
4928 2616
5376 2216
5824 1896
6272 1624
6720 1408
7168 1240
7616 1096
8064 984

17
logs/p4/sqr_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 127456
1344 63752
1792 37920
2240 25440
2688 18200
3136 13728
3584 10968
4032 9072
4480 7608
4928 6440
5376 5528
5824 4768
6272 4328
6720 3888
7168 3504
7616 3176
8064 2896

16
logs/p4/sub.log Normal file
View File

@ -0,0 +1,16 @@
224 7355896
448 6162880
672 5218984
896 4622776
1120 3999320
1344 3629480
1568 3290384
1792 2954752
2016 2737056
2240 2563320
2464 2451928
2688 2310920
2912 2139048
3136 2034080
3360 1890800
3584 1808624

View File

@ -1,17 +1,17 @@
896 416968
1344 223672
1792 141552
2240 97280
2688 71304
3136 54648
3584 16264
4032 13000
4480 10528
4928 8776
5376 7464
5824 6440
6272 5520
6720 4808
7168 4264
7616 3784
8064 3368
896 415472
1344 223736
1792 141232
2240 97624
2688 71400
3136 54800
3584 16904
4032 13528
4480 10968
4928 9128
5376 7784
5824 6672
6272 5760
6720 5056
7168 4440
7616 3952
8064 3512

View File

@ -1,17 +1,17 @@
896 416656
1344 223728
1792 141288
2240 97456
2688 71152
3136 54392
3584 38552
4032 32216
4480 27384
4928 23792
5376 20728
5824 18232
6272 16160
6720 14408
7168 11696
7616 10768
8064 9920
896 420464
1344 224800
1792 142808
2240 97704
2688 71416
3136 54504
3584 38320
4032 32360
4480 27576
4928 23840
5376 20688
5824 18264
6272 16176
6720 14440
7168 11688
7616 10752
8064 9936

View File

@ -1,16 +1,16 @@
224 9862520
448 8562344
672 7661400
896 6838128
1120 5911144
1344 5394040
1568 4993760
1792 4624240
2016 4332024
2240 4029312
2464 3790784
2688 3587216
2912 3397952
3136 3239736
3360 3080616
3584 2933104
224 9728504
448 8573648
672 7488096
896 6714064
1120 5950472
1344 5457400
1568 5038896
1792 4683632
2016 4384656
2240 4105976
2464 3871608
2688 3650680
2912 3463552
3136 3290016
3360 3135272
3584 2993848

View File

@ -1,6 +1,6 @@
CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops
VERSION=0.17
VERSION=0.18
default: libtommath.a
@ -33,7 +33,9 @@ bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o bn_radix
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o bn_mp_multi.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o
libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
@ -63,6 +65,11 @@ docdvi: tommath.src
makeindex tommath
latex tommath > /dev/null
# poster, makes the single page PDF poster
poster: poster.tex
pdflatex poster
rm -f poster.aux poster.log
# makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files
docs:
cd pics ; make pdfes
@ -88,11 +95,12 @@ manual:
clean:
rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c
tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c \
poster.aux poster.dvi poster.log
cd etc ; make clean
cd pics ; make clean
zipup: clean manual
zipup: clean manual poster
perl gen.pl ; mv mpi.c pre_gen/ ; \
cd .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \
cp -R ./libtommath/* ./libtommath-$(VERSION)/ ; tar -c libtommath-$(VERSION)/* > ltm-$(VERSION).tar ; \

37
makefile.bcc Normal file
View File

@ -0,0 +1,37 @@
#
# Borland C++Builder Makefile (makefile.bcc)
#
LIB = tlib
CC = bcc32
CFLAGS = -c -O2 -I.
OBJECTS=bncore.obj bn_mp_init.obj bn_mp_clear.obj bn_mp_exch.obj bn_mp_grow.obj bn_mp_shrink.obj \
bn_mp_clamp.obj bn_mp_zero.obj bn_mp_set.obj bn_mp_set_int.obj bn_mp_init_size.obj bn_mp_copy.obj \
bn_mp_init_copy.obj bn_mp_abs.obj bn_mp_neg.obj bn_mp_cmp_mag.obj bn_mp_cmp.obj bn_mp_cmp_d.obj \
bn_mp_rshd.obj bn_mp_lshd.obj bn_mp_mod_2d.obj bn_mp_div_2d.obj bn_mp_mul_2d.obj bn_mp_div_2.obj \
bn_mp_mul_2.obj bn_s_mp_add.obj bn_s_mp_sub.obj bn_fast_s_mp_mul_digs.obj bn_s_mp_mul_digs.obj \
bn_fast_s_mp_mul_high_digs.obj bn_s_mp_mul_high_digs.obj bn_fast_s_mp_sqr.obj bn_s_mp_sqr.obj \
bn_mp_add.obj bn_mp_sub.obj bn_mp_karatsuba_mul.obj bn_mp_mul.obj bn_mp_karatsuba_sqr.obj \
bn_mp_sqr.obj bn_mp_div.obj bn_mp_mod.obj bn_mp_add_d.obj bn_mp_sub_d.obj bn_mp_mul_d.obj \
bn_mp_div_d.obj bn_mp_mod_d.obj bn_mp_expt_d.obj bn_mp_addmod.obj bn_mp_submod.obj \
bn_mp_mulmod.obj bn_mp_sqrmod.obj bn_mp_gcd.obj bn_mp_lcm.obj bn_fast_mp_invmod.obj bn_mp_invmod.obj \
bn_mp_reduce.obj bn_mp_montgomery_setup.obj bn_fast_mp_montgomery_reduce.obj bn_mp_montgomery_reduce.obj \
bn_mp_exptmod_fast.obj bn_mp_exptmod.obj bn_mp_2expt.obj bn_mp_n_root.obj bn_mp_jacobi.obj bn_reverse.obj \
bn_mp_count_bits.obj bn_mp_read_unsigned_bin.obj bn_mp_read_signed_bin.obj bn_mp_to_unsigned_bin.obj \
bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn_radix.obj \
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj
TARGET = libtommath.lib
$(TARGET): $(OBJECTS)
.c.objbj:
$(CC) $(CFLAGS) $<
$(LIB) $(TARGET) -+$@

View File

@ -23,7 +23,10 @@ bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj
library: $(OBJECTS)

View File

@ -40,14 +40,10 @@ void rand_num(mp_int *a)
int n, size;
unsigned char buf[2048];
top:
size = 1 + ((fgetc(rng)*fgetc(rng)) % 1024);
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 1031;
buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng);
for (n = 0; n < size; n++) {
if (buf[n+1]) break;
}
if (n == size) goto top;
while (buf[1] == 0) buf[1] = fgetc(rng);
mp_read_raw(a, buf, 1+size);
}
@ -56,14 +52,10 @@ void rand_num2(mp_int *a)
int n, size;
unsigned char buf[2048];
top:
size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 97;
buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng);
for (n = 0; n < size; n++) {
if (buf[n+1]) break;
}
if (n == size) goto top;
while (buf[1] == 0) buf[1] = fgetc(rng);
mp_read_raw(a, buf, 1+size);
}
@ -73,6 +65,7 @@ int main(void)
{
int n;
mp_int a, b, c, d, e;
clock_t t1;
char buf[4096];
mp_init(&a);
@ -108,8 +101,14 @@ int main(void)
}
}
t1 = clock();
for (;;) {
n = fgetc(rng) % 13;
if (clock() - t1 > CLOCKS_PER_SEC) {
sleep(1);
t1 = clock();
}
n = fgetc(rng) % 13;
if (n == 0) {
/* add tests */
@ -227,6 +226,7 @@ int main(void)
rand_num2(&a);
rand_num2(&b);
rand_num2(&c);
// if (c.dp[0]&1) mp_add_d(&c, 1, &c);
a.sign = b.sign = c.sign = 0;
mp_exptmod(&a, &b, &c, &d);
printf("expt\n");

BIN
pics/expt_state.sxd Normal file

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More