added libtommath-0.17

This commit is contained in:
Tom St Denis 2003-05-17 12:33:54 +00:00 committed by Steffen Jaeckel
parent 14161e843e
commit fd181cc841
87 changed files with 14780 additions and 6958 deletions

BIN
bn.pdf

Binary file not shown.

4
bn.tex
View File

@ -1,7 +1,7 @@
\documentclass[]{report}
\documentclass[]{article}
\begin{document}
\title{LibTomMath v0.16 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
\title{LibTomMath v0.17 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org }
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
\newpage

View File

@ -27,41 +27,18 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
int res, neg;
/* init all our temps */
if ((res = mp_init (&x)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_init (&y)) != MP_OKAY) {
goto __X;
}
if ((res = mp_init (&u)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_init (&v)) != MP_OKAY) {
goto __U;
}
if ((res = mp_init (&B)) != MP_OKAY) {
goto __V;
}
if ((res = mp_init (&D)) != MP_OKAY) {
goto __B;
if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
return res;
}
/* x == modulus, y == value to invert */
if ((res = mp_copy (b, &x)) != MP_OKAY) {
goto __D;
}
if ((res = mp_copy (a, &y)) != MP_OKAY) {
goto __D;
goto __ERR;
}
/* we need |y| */
if ((res = mp_abs (&y, &y)) != MP_OKAY) {
goto __D;
/* we need y = |a| */
if ((res = mp_abs (a, &y)) != MP_OKAY) {
goto __ERR;
}
/* 2. [modified] if x,y are both even then return an error!
@ -70,15 +47,15 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
*/
if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
res = MP_VAL;
goto __D;
goto __ERR;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto __D;
goto __ERR;
}
mp_set (&D, 1);
@ -87,17 +64,17 @@ top:
while (mp_iseven (&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto __D;
goto __ERR;
}
/* 4.2 if A or B is odd then */
if (mp_iseven (&B) == 0) {
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
/* B = B/2 */
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
@ -105,18 +82,18 @@ top:
while (mp_iseven (&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto __D;
goto __ERR;
}
/* 5.2 if C,D are even then */
if (mp_iseven (&D) == 0) {
/* D = (D-x)/2 */
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
/* D = D/2 */
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
@ -124,20 +101,20 @@ top:
if (mp_cmp (&u, &v) != MP_LT) {
/* u = u - v, B = B - D */
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
goto __D;
goto __ERR;
}
} else {
/* v - v - u, D = D - B */
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
@ -151,26 +128,20 @@ top:
/* if v != 1 then there is no inverse */
if (mp_cmp_d (&v, 1) != MP_EQ) {
res = MP_VAL;
goto __D;
goto __ERR;
}
/* b is now the inverse */
neg = a->sign;
while (D.sign == MP_NEG) {
if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
mp_exch (&D, c);
c->sign = neg;
res = MP_OKAY;
__D:mp_clear (&D);
__B:mp_clear (&B);
__V:mp_clear (&v);
__U:mp_clear (&u);
__Y:mp_clear (&y);
__X:mp_clear (&x);
__ERR:
__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
return res;
}

View File

@ -26,7 +26,7 @@ int
fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
{
int ix, res, olduse;
mp_word W[512];
mp_word W[MP_WARRAY];
/* get old used count */
olduse = a->used;
@ -92,7 +92,7 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
/* inner loop */
for (iy = 0; iy < m->used; iy++) {
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
*_W++ += ((mp_word) ui) * ((mp_word) * tmpx++);
}
}

View File

@ -16,14 +16,16 @@
/* Fast (comba) multiplier
*
* This is the fast column-array [comba] multiplier. It is designed to compute
* the columns of the product first then handle the carries afterwards. This
* has the effect of making the nested loops that compute the columns very
* This is the fast column-array [comba] multiplier. It is
* designed to compute the columns of the product first
* then handle the carries afterwards. This has the effect
* of making the nested loops that compute the columns very
* simple and schedulable on super-scalar processors.
*
* This has been modified to produce a variable number of digits of output so
* if say only a half-product is required you don't have to compute the upper half
* (a feature required for fast Barrett reduction).
* This has been modified to produce a variable number of
* digits of output so if say only a half-product is required
* you don't have to compute the upper half (a feature
* required for fast Barrett reduction).
*
* Based on Algorithm 14.12 on pp.595 of HAC.
*
@ -32,7 +34,7 @@ int
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int olduse, res, pa, ix;
mp_word W[512];
mp_word W[MP_WARRAY];
/* grow the destination as required */
if (c->alloc < digs) {
@ -47,10 +49,9 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* calculate the columns */
pa = a->used;
for (ix = 0; ix < pa; ix++) {
/* this multiplier has been modified to allow you to control how many digits
* of output are produced. So at most we want to make upto "digs" digits
* of output.
/* this multiplier has been modified to allow you to
* control how many digits of output are produced.
* So at most we want to make upto "digs" digits of output.
*
* this adds products to distinct columns (at ix+iy) of W
* note that each step through the loop is not dependent on
@ -73,14 +74,14 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
*/
_W = W + ix;
/* the number of digits is limited by their placement. E.g.
/* the number of digits is limited by their placement. E.g.
we avoid multiplying digits that will end up above the # of
digits of precision requested
*/
pb = MIN (b->used, digs - ix);
for (iy = 0; iy < pb; iy++) {
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
}
}
@ -97,11 +98,12 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
* correct result we must take the extra bits from each column and
* carry them down
*
* Note that while this adds extra code to the multiplier it saves time
* since the carry propagation is removed from the above nested loop.
* This has the effect of reducing the work from N*(N+N*c)==N^2 + c*N^2 to
* N^2 + N*c where c is the cost of the shifting. On very small numbers
* this is slower but on most cryptographic size numbers it is faster.
* Note that while this adds extra code to the multiplier it
* saves time since the carry propagation is removed from the
* above nested loop.This has the effect of reducing the work
* from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the
* cost of the shifting. On very small numbers this is slower
* but on most cryptographic size numbers it is faster.
*/
tmpc = c->dp;
for (ix = 1; ix < digs; ix++) {

View File

@ -27,7 +27,7 @@ int
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int oldused, newused, res, pa, pb, ix;
mp_word W[512];
mp_word W[MP_WARRAY];
/* calculate size of product and allocate more space if required */
newused = a->used + b->used + 1;
@ -55,15 +55,23 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* alias for right side */
tmpy = b->dp + iy;
/* alias for the columns of output. Offset to be equal to or above the
* smallest digit place requested
*/
_W = &(W[digs]);
_W = W + digs;
/* skip cases below zero where ix > digs */
if (iy < 0) {
iy = abs(iy);
tmpy += iy;
_W += iy;
iy = 0;
}
/* compute column products for digits above the minimum */
for (; iy < pb; iy++) {
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
}
}
}

View File

@ -20,7 +20,7 @@
* then the carries are computed. This has the effect of making a very simple
* inner loop that is executed the most
*
* W2 represents the outer products and W the inner.
* W2 represents the outer products and W the inner.
*
* A further optimizations is made because the inner products are of the form
* "A * B * 2". The *2 part does not need to be computed until the end which is
@ -33,7 +33,7 @@ int
fast_s_mp_sqr (mp_int * a, mp_int * b)
{
int olduse, newused, res, ix, pa;
mp_word W2[512], W[512];
mp_word W2[MP_WARRAY], W[MP_WARRAY];
/* calculate size of product and allocate as required */
pa = a->used;
@ -44,9 +44,9 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
}
}
/* zero temp buffer (columns)
/* zero temp buffer (columns)
* Note that there are two buffers. Since squaring requires
* a outter and inner product and the inner product requires
* a outter and inner product and the inner product requires
* computing a product and doubling it (a relatively expensive
* op to perform n^2 times if you don't have to) the inner and
* outer products are computed in different buffers. This way
@ -60,7 +60,7 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
* values in W2 are only written in even locations which means
* we can collapse the array to 256 words [and fixup the memset above]
* provided we also fix up the summations below. Ideally
* the fixup loop should be unrolled twice to handle the even/odd
* the fixup loop should be unrolled twice to handle the even/odd
* cases, and then a final step to handle odd cases [e.g. newused == odd]
*
* This will not only save ~8*256 = 2KB of stack but lower the number of
@ -71,10 +71,10 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
* the multiplication by two is done afterwards in the N loop.
*/
for (ix = 0; ix < pa; ix++) {
/* compute the outer product
/* compute the outer product
*
* Note that every outer product is computed
* for a particular column only once which means that
* Note that every outer product is computed
* for a particular column only once which means that
* there is no need todo a double precision addition
*/
W2[ix + ix] = ((mp_word) a->dp[ix]) * ((mp_word) a->dp[ix]);
@ -95,7 +95,7 @@ fast_s_mp_sqr (mp_int * a, mp_int * b)
/* inner products */
for (iy = ix + 1; iy < pa; iy++) {
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
*_W++ += ((mp_word) tmpx) * ((mp_word) * tmpy++);
}
}
}

View File

@ -24,33 +24,25 @@ mp_add (mp_int * a, mp_int * b, mp_int * c)
sa = a->sign;
sb = b->sign;
/* handle four cases */
if (sa == MP_ZPOS && sb == MP_ZPOS) {
/* both positive */
/* handle two cases, not four */
if (sa == sb) {
/* both positive or both negative */
/* add their magnitudes, copy the sign */
c->sign = sa;
res = s_mp_add (a, b, c);
c->sign = MP_ZPOS;
} else if (sa == MP_ZPOS && sb == MP_NEG) {
/* a + -b == a - b, but if b>a then we do it as -(b-a) */
if (mp_cmp_mag (a, b) == MP_LT) {
res = s_mp_sub (b, a, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub (a, b, c);
c->sign = MP_ZPOS;
}
} else if (sa == MP_NEG && sb == MP_ZPOS) {
/* -a + b == b - a, but if a>b then we do it as -(a-b) */
if (mp_cmp_mag (a, b) == MP_GT) {
res = s_mp_sub (a, b, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub (b, a, c);
c->sign = MP_ZPOS;
}
} else {
/* -a + -b == -(a + b) */
res = s_mp_add (a, b, c);
c->sign = MP_NEG;
/* one positive, the other negative */
/* subtract the one with the greater magnitude from */
/* the one of the lesser magnitude. The result gets */
/* the sign of the one with the greater magnitude. */
if (mp_cmp_mag (a, b) == MP_LT) {
c->sign = sb;
res = s_mp_sub (b, a, c);
} else {
c->sign = sa;
res = s_mp_sub (a, b, c);
}
}
return res;
}

View File

@ -21,8 +21,17 @@ mp_cmp (mp_int * a, mp_int * b)
/* compare based on sign */
if (a->sign == MP_NEG && b->sign == MP_ZPOS) {
return MP_LT;
} else if (a->sign == MP_ZPOS && b->sign == MP_NEG) {
}
if (a->sign == MP_ZPOS && b->sign == MP_NEG) {
return MP_GT;
}
return mp_cmp_mag (a, b);
/* compare digits */
if (a->sign == MP_NEG) {
/* if negative compare opposite direction */
return mp_cmp_mag(b, a);
} else {
return mp_cmp_mag(a, b);
}
}

View File

@ -23,7 +23,9 @@ mp_cmp_mag (mp_int * a, mp_int * b)
/* compare based on # of non-zero digits */
if (a->used > b->used) {
return MP_GT;
} else if (a->used < b->used) {
}
if (a->used < b->used) {
return MP_LT;
}
@ -31,7 +33,9 @@ mp_cmp_mag (mp_int * a, mp_int * b)
for (n = a->used - 1; n >= 0; n--) {
if (a->dp[n] > b->dp[n]) {
return MP_GT;
} else if (a->dp[n] < b->dp[n]) {
}
if (a->dp[n] < b->dp[n]) {
return MP_LT;
}
}

View File

@ -31,13 +31,10 @@ mp_copy (mp_int * a, mp_int * b)
}
/* zero b and copy the parameters over */
b->used = a->used;
b->sign = a->sign;
{
register mp_digit *tmpa, *tmpb;
/* point aliases */
/* pointer aliases */
tmpa = a->dp;
tmpb = b->dp;
@ -47,9 +44,11 @@ mp_copy (mp_int * a, mp_int * b)
}
/* clear high digits */
for (; n < b->alloc; n++) {
for (; n < b->used; n++) {
*tmpb++ = 0;
}
}
b->used = a->used;
b->sign = a->sign;
return MP_OKAY;
}

View File

@ -75,7 +75,7 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
/* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */
norm = mp_count_bits(&y) % DIGIT_BIT;
if (norm < (DIGIT_BIT-1)) {
if (norm < (int)(DIGIT_BIT-1)) {
norm = (DIGIT_BIT-1) - norm;
if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
goto __Y;
@ -86,13 +86,13 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
} else {
norm = 0;
}
/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
n = x.used - 1;
t = y.used - 1;
/* step 2. while (x >= y*b^n-t) do { q[n-t] += 1; x -= y*b^{n-t} } */
if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b^{n-t} */
if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b^{n-t} */
goto __Y;
}
@ -113,14 +113,14 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
/* step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
if (x.dp[i] == y.dp[t]) {
q.dp[i - t - 1] = ((1UL << DIGIT_BIT) - 1UL);
q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
} else {
mp_word tmp;
tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
tmp |= ((mp_word) x.dp[i - 1]);
tmp /= ((mp_word) y.dp[t]);
if (tmp > (mp_word) MP_MASK)
tmp = MP_MASK;
tmp = MP_MASK;
q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
}
@ -135,7 +135,7 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
t1.dp[1] = y.dp[t];
t1.used = 2;
if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
goto __Y;
goto __Y;
}
/* find right hand */
@ -143,7 +143,7 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
t2.dp[2] = x.dp[i];
t2.used = 3;
} while (mp_cmp (&t1, &t2) == MP_GT);
} while (mp_cmp_mag(&t1, &t2) == MP_GT);
/* step 3.3 x = x - q{i-t-1} * y * b^{i-t-1} */
if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
@ -161,19 +161,19 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
/* step 3.4 if x < 0 then { x = x + y*b^{i-t-1}; q{i-t-1} -= 1; } */
if (x.sign == MP_NEG) {
if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
goto __Y;
goto __Y;
}
if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
goto __Y;
goto __Y;
}
if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
goto __Y;
goto __Y;
}
q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
}
}
/* now q is the quotient and x is the remainder [which we have to normalize] */
/* get sign before writing to c */
x.sign = a->sign;

View File

@ -34,19 +34,19 @@ mp_div_2 (mp_int * a, mp_int * b)
/* source alias */
tmpa = a->dp + b->used - 1;
/* dest alias */
tmpb = b->dp + b->used - 1;
/* carry */
r = 0;
for (x = b->used - 1; x >= 0; x--) {
/* get the carry for the next iteration */
rr = *tmpa & 1;
/* shift the current digit, add in carry and store */
*tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
/* forward carry to next iteration */
r = rr;
}

View File

@ -51,7 +51,7 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
}
/* shift by as many digits in the bit count */
if (b >= DIGIT_BIT) {
if (b >= (int)DIGIT_BIT) {
mp_rshd (c, b / DIGIT_BIT);
}
@ -59,13 +59,13 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
D = (mp_digit) (b % DIGIT_BIT);
if (D != 0) {
register mp_digit *tmpc, mask;
/* mask */
mask = (1U << D) - 1U;
mask = (((mp_digit)1) << D) - 1;
/* alias */
tmpc = c->dp + (c->used - 1);
/* carry */
r = 0;
for (x = c->used - 1; x >= 0; x--) {

34
bn_mp_dr_is_modulus.c Normal file
View File

@ -0,0 +1,34 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* determines if a number is a valid DR modulus */
int mp_dr_is_modulus(mp_int *a)
{
int ix;
/* must be at least two digits */
if (a->used < 2) {
return 0;
}
for (ix = 1; ix < a->used; ix++) {
if (a->dp[ix] != MP_MASK) {
return 0;
}
}
return 1;
}

View File

@ -16,7 +16,7 @@
/* reduce "a" in place modulo "b" using the Diminished Radix algorithm.
*
* Based on algorithm from the paper
* Based on algorithm from the paper
*
* "Generating Efficient Primes for Discrete Log Cryptosystems"
* Chae Hoon Lim, Pil Loong Lee,
@ -40,15 +40,15 @@ mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
return err;
}
}
/* alias for a->dp[i] */
tmpi = a->dp + k + k - 1;
/* for (i = 2k - 1; i >= k; i = i - 1)
/* for (i = 2k - 1; i >= k; i = i - 1)
*
* This is the main loop of the reduction. Note that at the end
* the words above position k are not zeroed as expected. The end
* result is that the digits from 0 to k-1 are the residue. So
* result is that the digits from 0 to k-1 are the residue. So
* we have to clear those afterwards.
*/
for (i = k + k - 1; i >= k; i = i - 1) {
@ -57,10 +57,10 @@ mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
/* x[i] * mp */
r = ((mp_word) *tmpi--) * ((mp_word) mp);
/* now add r to x[i-1:i-k]
/* now add r to x[i-1:i-k]
*
* First add it to the first digit x[i-k] then form the carry
* then enter the main loop
* then enter the main loop
*/
j = i - k;
@ -74,14 +74,14 @@ mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
mu = (r >> ((mp_word) DIGIT_BIT)) + (*tmpj >> DIGIT_BIT);
/* clear carry from a->dp[j] */
*tmpj++ &= MP_MASK;
*tmpj++ &= MP_MASK;
/* now add rest of the digits
*
/* now add rest of the digits
*
* Note this is basically a simple single digit addition to
* a larger multiple digit number. This is optimized somewhat
* because the propagation of carries is not likely to move
* more than a few digits.
* more than a few digits.
*
*/
for (++j; mu != 0 && j <= (i - 1); ++j) {
@ -99,16 +99,16 @@ mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
*tmpj += mp;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
/* now handle carries */
for (++j; mu != 0 && j <= (i - 1); j++) {
*tmpj += mu;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
*tmpj += mu;
mu = *tmpj >> DIGIT_BIT;
*tmpj++ &= MP_MASK;
}
}
}
/* zero words above k */
tmpi = a->dp + k;
for (i = k; i < a->used; i++) {
@ -117,34 +117,13 @@ mp_dr_reduce (mp_int * a, mp_int * b, mp_digit mp)
/* clamp, sub and return */
mp_clamp (a);
/* if a >= b [b == modulus] then subtract the modulus to fix up */
if (mp_cmp_mag (a, b) != MP_LT) {
return s_mp_sub (a, b, a);
}
return MP_OKAY;
}
/* determines if a number is a valid DR modulus */
int mp_dr_is_modulus(mp_int *a)
{
int ix;
/* must be at least two digits */
if (a->used < 2) {
return 0;
}
for (ix = 1; ix < a->used; ix++) {
if (a->dp[ix] != MP_MASK) {
return 0;
}
}
return 1;
}
/* determines the setup value */
void mp_dr_setup(mp_int *a, mp_digit *d)
{
*d = (1 << DIGIT_BIT) - a->dp[0];
}

25
bn_mp_dr_setup.c Normal file
View File

@ -0,0 +1,25 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
/* determines the setup value */
void mp_dr_setup(mp_int *a, mp_digit *d)
{
/* the casts are required if DIGIT_BIT is one less than
* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
*/
*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - ((mp_word)a->dp[0]));
}

View File

@ -35,11 +35,11 @@ mp_expt_d (mp_int * a, mp_digit b, mp_int * c)
return res;
}
/* if the bit is set multiply */
if ((b & (mp_digit) (1 << (DIGIT_BIT - 1))) != 0) {
/* if the bit is set multiply */
if ((b & (mp_digit) (((mp_digit)1) << (DIGIT_BIT - 1))) != 0) {
if ((res = mp_mul (c, &g, c)) != MP_OKAY) {
mp_clear (&g);
return res;
mp_clear (&g);
return res;
}
}

View File

@ -17,7 +17,7 @@
static int f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
/* this is a shell function that calls either the normal or Montgomery
* exptmod functions. Originally the call to the montgomery code was
* exptmod functions. Originally the call to the montgomery code was
* embedded in the normal function but that wasted alot of stack space
* for nothing (since 99% of the time the Montgomery code would be called)
*/
@ -25,10 +25,46 @@ int
mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
{
int dr;
/* modulus P must be positive */
if (P->sign == MP_NEG) {
return MP_VAL;
}
/* if exponent X is negative we have to recurse */
if (X->sign == MP_NEG) {
mp_int tmpG, tmpX;
int err;
/* first compute 1/G mod P */
if ((err = mp_init(&tmpG)) != MP_OKAY) {
return err;
}
if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
mp_clear(&tmpG);
return err;
}
/* now get |X| */
if ((err = mp_init(&tmpX)) != MP_OKAY) {
mp_clear(&tmpG);
return err;
}
if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
mp_clear_multi(&tmpG, &tmpX, NULL);
return err;
}
/* and now compute (1/G)^|X| instead of G^X [X < 0] */
err = mp_exptmod(&tmpG, &tmpX, P, Y);
mp_clear_multi(&tmpG, &tmpX, NULL);
return err;
}
dr = mp_dr_is_modulus(P);
/* if the modulus is odd use the fast method */
if (((mp_isodd (P) == 1 && P->used < MONTGOMERY_EXPT_CUTOFF) || dr == 1) && P->used > 4) {
if ((mp_isodd (P) == 1 || dr == 1) && P->used > 4) {
return mp_exptmod_fast (G, X, P, Y, dr);
} else {
return f_mp_exptmod (G, X, P, Y);
@ -60,11 +96,17 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
winsize = 8;
}
#ifdef MP_LOW_MEM
if (winsize > 5) {
winsize = 5;
}
#endif
/* init G array */
for (x = 0; x < (1 << winsize); x++) {
if ((err = mp_init_size (&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
mp_clear (&M[y]);
}
return err;
}
@ -78,7 +120,7 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
goto __MU;
}
/* create M table
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
@ -119,30 +161,29 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
mp_set (&res, 1);
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 0;
buf = 0;
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
bitcnt = 1;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
break;
}
buf = X->dp[digidx--];
bitcnt = (int) DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (DIGIT_BIT - 1)) & 1;
buf <<= 1;
y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0)
@ -151,10 +192,10 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
goto __RES;
}
continue;
}
@ -167,20 +208,20 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __MU;
goto __MU;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __MU;
goto __MU;
}
/* empty window and reset */
@ -194,21 +235,21 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
}
}

View File

@ -19,7 +19,7 @@
* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
* The value of k changes based on the size of the exponent.
*
* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
*/
int
mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
@ -28,7 +28,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
mp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
int (*redux)(mp_int*,mp_int*,mp_digit);
/* find window size */
x = mp_count_bits (X);
if (x <= 7) {
@ -47,22 +47,37 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
winsize = 8;
}
#ifdef MP_LOW_MEM
if (winsize > 5) {
winsize = 5;
}
#endif
/* init G array */
for (x = 0; x < (1 << winsize); x++) {
if ((err = mp_init (&M[x])) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear (&M[y]);
mp_clear (&M[y]);
}
return err;
}
}
if (redmode == 0) {
/* now setup montgomery */
if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
goto __M;
}
redux = mp_montgomery_reduce;
/* automatically pick the comba one if available (saves quite a few calls/ifs) */
if ( ((P->used * 2 + 1) < MP_WARRAY) &&
P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
redux = fast_mp_montgomery_reduce;
} else {
/* use slower baselien method */
redux = mp_montgomery_reduce;
}
} else {
/* setup DR reduction */
mp_dr_setup(P, &mp);
@ -97,7 +112,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
goto __RES;
}
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __RES;
@ -123,42 +138,42 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
}
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 0;
buf = 0;
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
bitcnt = 1;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
break;
}
buf = X->dp[digidx--];
bitcnt = (int) DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (DIGIT_BIT - 1)) & 1;
buf <<= 1;
y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0)
if (mode == 0 && y == 0) {
continue;
}
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto __RES;
}
continue;
}
@ -171,20 +186,20 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __RES;
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto __RES;
}
/* empty window and reset */
@ -198,21 +213,21 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
}
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
}
}
}
}
@ -222,7 +237,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
if ((err = mp_montgomery_reduce (&res, P, mp)) != MP_OKAY) {
goto __RES;
}
}
}
mp_exch (&res, Y);
err = MP_OKAY;

View File

@ -82,18 +82,18 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c)
/* B3 (and B4). Halve t, if even */
while (t.used != 0 && mp_iseven(&t) == 1) {
if ((res = mp_div_2 (&t, &t)) != MP_OKAY) {
goto __T;
goto __T;
}
}
/* B5. if t>0 then u=t otherwise v=-t */
if (t.used != 0 && t.sign != MP_NEG) {
if ((res = mp_copy (&t, &u)) != MP_OKAY) {
goto __T;
goto __T;
}
} else {
if ((res = mp_copy (&t, &v)) != MP_OKAY) {
goto __T;
goto __T;
}
v.sign = (v.sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
}
@ -102,9 +102,9 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c)
if ((res = mp_sub (&u, &v, &t)) != MP_OKAY) {
goto __T;
}
}
while (t.used != 0);
} while (mp_iszero(&t) == 0);
/* multiply by 2^k which we divided out at the beginning */
if ((res = mp_mul_2d (&u, k, &u)) != MP_OKAY) {
goto __T;
}

View File

@ -18,12 +18,12 @@
int
mp_grow (mp_int * a, int size)
{
int i, n;
int i;
/* if the alloc size is smaller alloc more ram */
if (a->alloc < size) {
/* ensure there are always at least MP_PREC digits extra on top */
size += (MP_PREC * 2) - (size & (MP_PREC - 1));
size += (MP_PREC * 2) - (size & (MP_PREC - 1));
a->dp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size);
if (a->dp == NULL) {
@ -31,9 +31,9 @@ mp_grow (mp_int * a, int size)
}
/* zero excess digits */
n = a->alloc;
i = a->alloc;
a->alloc = size;
for (i = n; i < a->alloc; i++) {
for (; i < a->alloc; i++) {
a->dp[i] = 0;
}
}

View File

@ -18,7 +18,6 @@
int
mp_init (mp_int * a)
{
/* allocate ram required and clear it */
a->dp = OPT_CAST calloc (sizeof (mp_digit), MP_PREC);
if (a->dp == NULL) {

View File

@ -29,63 +29,36 @@ mp_invmod (mp_int * a, mp_int * b, mp_int * c)
if (mp_iseven (b) == 0) {
return fast_mp_invmod (a, b, c);
}
if ((res = mp_init (&x)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_init (&y)) != MP_OKAY) {
goto __X;
}
if ((res = mp_init (&u)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_init (&v)) != MP_OKAY) {
goto __U;
}
if ((res = mp_init (&A)) != MP_OKAY) {
goto __V;
}
if ((res = mp_init (&B)) != MP_OKAY) {
goto __A;
}
if ((res = mp_init (&C)) != MP_OKAY) {
goto __B;
}
if ((res = mp_init (&D)) != MP_OKAY) {
goto __C;
/* init temps */
if ((res = mp_init_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL)) != MP_OKAY) {
return res;
}
/* x = a, y = b */
if ((res = mp_copy (a, &x)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_copy (b, &y)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_abs (&x, &x)) != MP_OKAY) {
goto __D;
goto __ERR;
}
/* 2. [modified] if x,y are both even then return an error! */
if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
res = MP_VAL;
goto __D;
goto __ERR;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto __D;
goto __ERR;
}
mp_set (&A, 1);
mp_set (&D, 1);
@ -96,24 +69,24 @@ top:
while (mp_iseven (&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto __D;
goto __ERR;
}
/* 4.2 if A or B is odd then */
if (mp_iseven (&A) == 0 || mp_iseven (&B) == 0) {
/* A = (A+y)/2, B = (B-x)/2 */
if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
/* A = A/2, B = B/2 */
if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
@ -122,24 +95,24 @@ top:
while (mp_iseven (&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto __D;
goto __ERR;
}
/* 5.2 if C,D are even then */
if (mp_iseven (&C) == 0 || mp_iseven (&D) == 0) {
/* C = (C+y)/2, D = (D-x)/2 */
if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
/* C = C/2, D = D/2 */
if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
@ -147,28 +120,28 @@ top:
if (mp_cmp (&u, &v) != MP_LT) {
/* u = u - v, A = A - C, B = B - D */
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
goto __D;
goto __ERR;
}
} else {
/* v - v - u, C = C - A, D = D - B */
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
goto __D;
goto __ERR;
}
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
goto __D;
goto __ERR;
}
}
@ -181,21 +154,13 @@ top:
/* if v != 1 then there is no inverse */
if (mp_cmp_d (&v, 1) != MP_EQ) {
res = MP_VAL;
goto __D;
goto __ERR;
}
/* a is now the inverse */
mp_exch (&C, c);
res = MP_OKAY;
__D:mp_clear (&D);
__C:mp_clear (&C);
__B:mp_clear (&B);
__A:mp_clear (&A);
__V:mp_clear (&v);
__U:mp_clear (&u);
__Y:mp_clear (&y);
__X:mp_clear (&x);
__ERR:
__ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
return res;
}

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* computes the jacobi c = (a | n) (or Legendre if b is prime)
/* computes the jacobi c = (a | n) (or Legendre if n is prime)
* HAC pp. 73 Algorithm 2.149
*/
int

View File

@ -36,7 +36,7 @@
int
mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
{
mp_int x0, x1, y0, y1, t1, t2, x0y0, x1y1;
mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
int B, err;
err = MP_MEM;
@ -60,10 +60,8 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
/* init temps */
if (mp_init_size (&t1, B * 2) != MP_OKAY)
goto Y1;
if (mp_init_size (&t2, B * 2) != MP_OKAY)
goto T1;
if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
goto T2;
goto T1;
if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
goto X0Y0;
@ -110,41 +108,40 @@ mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
mp_clamp (&y0);
/* now calc the products x0y0 and x1y1 */
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
goto X1Y1; /* x0y0 = x0*y0 */
if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) /* after this x0 is no longer required, free temp [x0==t2]! */
goto X1Y1; /* x0y0 = x0*y0 */
if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
goto X1Y1; /* x1y1 = x1*y1 */
goto X1Y1; /* x1y1 = x1*y1 */
/* now calc x1-x0 and y1-y0 */
if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
goto X1Y1; /* t1 = x1 - x0 */
if (mp_sub (&y1, &y0, &t2) != MP_OKAY)
goto X1Y1; /* t2 = y1 - y0 */
if (mp_mul (&t1, &t2, &t1) != MP_OKAY)
goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
goto X1Y1; /* t1 = x1 - x0 */
if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
goto X1Y1; /* t2 = y1 - y0 */
if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
/* add x0y0 */
if (mp_add (&x0y0, &x1y1, &t2) != MP_OKAY)
goto X1Y1; /* t2 = x0y0 + x1y1 */
if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
goto X1Y1; /* t2 = x0y0 + x1y1 */
if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
/* shift by B */
if (mp_lshd (&t1, B) != MP_OKAY)
goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
goto X1Y1; /* x1y1 = x1y1 << 2*B */
goto X1Y1; /* x1y1 = x1y1 << 2*B */
if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
goto X1Y1; /* t1 = x0y0 + t1 */
goto X1Y1; /* t1 = x0y0 + t1 */
if (mp_add (&t1, &x1y1, c) != MP_OKAY)
goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
err = MP_OKAY;
X1Y1:mp_clear (&x1y1);
X0Y0:mp_clear (&x0y0);
T2:mp_clear (&t2);
T1:mp_clear (&t1);
Y1:mp_clear (&y1);
Y0:mp_clear (&y0);

View File

@ -74,32 +74,32 @@ mp_karatsuba_sqr (mp_int * a, mp_int * b)
/* now calc the products x0*x0 and x1*x1 */
if (mp_sqr (&x0, &x0x0) != MP_OKAY)
goto X1X1; /* x0x0 = x0*x0 */
goto X1X1; /* x0x0 = x0*x0 */
if (mp_sqr (&x1, &x1x1) != MP_OKAY)
goto X1X1; /* x1x1 = x1*x1 */
goto X1X1; /* x1x1 = x1*x1 */
/* now calc x1-x0 and y1-y0 */
/* now calc (x1-x0)^2 */
if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
goto X1X1; /* t1 = x1 - x0 */
goto X1X1; /* t1 = x1 - x0 */
if (mp_sqr (&t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = (x1 - x0) * (y1 - y0) */
goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
/* add x0y0 */
if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
goto X1X1; /* t2 = x0y0 + x1y1 */
goto X1X1; /* t2 = x0y0 + x1y1 */
if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
goto X1X1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
/* shift by B */
if (mp_lshd (&t1, B) != MP_OKAY)
goto X1X1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
goto X1X1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
goto X1X1; /* x1y1 = x1y1 << 2*B */
goto X1X1; /* x1y1 = x1y1 << 2*B */
if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + t1 */
goto X1X1; /* t1 = x0y0 + t1 */
if (mp_add (&t1, &x1x1, b) != MP_OKAY)
goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
err = MP_OKAY;

View File

@ -20,15 +20,16 @@ mp_lshd (mp_int * a, int b)
{
int x, res;
/* if its less than zero return */
if (b <= 0) {
return MP_OKAY;
}
/* grow to fit the new digits */
if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
return res;
if (a->alloc < a->used + b) {
if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
return res;
}
}
{

View File

@ -15,10 +15,10 @@
#include <tommath.h>
/* calculates a = B^n mod b for Montgomery reduction
* Where B is the base [e.g. 2^DIGIT_BIT].
* Where B is the base [e.g. 2^DIGIT_BIT].
* B^n mod b is computed by first computing
* A = B^(n-1) which doesn't require a reduction but a simple OR.
* then C = A * B = B^n is computed by performing upto DIGIT_BIT
* then C = A * B = B^n is computed by performing upto DIGIT_BIT
* shifts with subtractions when the result is greater than b.
*
* The method is slightly modified to shift B unconditionally upto just under
@ -38,13 +38,13 @@ mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
}
/* now compute C = A * B mod b */
for (x = bits - 1; x < DIGIT_BIT; x++) {
for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
return res;
}
if (mp_cmp_mag (a, b) != MP_LT) {
if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
return res;
return res;
}
}
}

View File

@ -21,12 +21,19 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
int ix, res, digs;
mp_digit ui;
/* can the fast reduction [comba] method be used?
*
* Note that unlike in mp_mul you're safely allowed *less*
* than the available columns [255 per default] since carries
* are fixed up in the inner loop.
*/
digs = m->used * 2 + 1;
if ((digs < 512)
&& digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
if ((digs < MP_WARRAY)
&& m->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_mp_montgomery_reduce (a, m, mp);
}
/* grow the input as required */
if (a->alloc < m->used * 2 + 1) {
if ((res = mp_grow (a, m->used * 2 + 1)) != MP_OKAY) {
return res;
@ -50,15 +57,15 @@ mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp)
mu = 0;
for (iy = 0; iy < m->used; iy++) {
r = ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) + ((mp_word) * tmpy);
mu = (r >> ((mp_word) DIGIT_BIT));
*tmpy++ = (r & ((mp_word) MP_MASK));
r = ((mp_word) ui) * ((mp_word) * tmpx++) + ((mp_word) mu) + ((mp_word) * tmpy);
mu = (r >> ((mp_word) DIGIT_BIT));
*tmpy++ = (r & ((mp_word) MP_MASK));
}
/* propagate carries */
while (mu) {
*tmpy += mu;
mu = (*tmpy >> DIGIT_BIT) & 1;
*tmpy++ &= MP_MASK;
*tmpy += mu;
mu = (*tmpy >> DIGIT_BIT) & 1;
*tmpy++ &= MP_MASK;
}
}
}

View File

@ -18,11 +18,11 @@
int
mp_montgomery_setup (mp_int * a, mp_digit * mp)
{
unsigned long x, b;
mp_digit x, b;
/* fast inversion mod 2^32
/* fast inversion mod 2^k
*
* Based on the fact that
* Based on the fact that
*
* XA = 1 (mod 2^n) => (X(2-XA)) A = 1 (mod 2^2n)
* => 2*X*A - X*X*A*A = 1
@ -34,13 +34,20 @@ mp_montgomery_setup (mp_int * a, mp_digit * mp)
return MP_VAL;
}
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2^4 */
x *= 2 - b * x; /* here x*a==1 mod 2^8 */
x *= 2 - b * x; /* here x*a==1 mod 2^16; each step doubles the nb of bits */
x *= 2 - b * x; /* here x*a==1 mod 2^32 */
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2^4 */
x *= 2 - b * x; /* here x*a==1 mod 2^8 */
#if !defined(MP_8BIT)
x *= 2 - b * x; /* here x*a==1 mod 2^16; each step doubles the nb of bits */
#endif
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
x *= 2 - b * x; /* here x*a==1 mod 2^32 */
#endif
#ifdef MP_64BIT
x *= 2 - b * x; /* here x*a==1 mod 2^64 */
#endif
/* t = -1/m mod b */
*mp = ((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - (x & MP_MASK);
*mp = (((mp_digit) 1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
return MP_OKAY;
}

View File

@ -24,15 +24,15 @@ mp_mul (mp_int * a, mp_int * b, mp_int * c)
res = mp_karatsuba_mul (a, b, c);
} else {
/* can we use the fast multiplier?
/* can we use the fast multiplier?
*
* The fast multiplier can be used if the output will have less than
* 512 digits and the number of digits won't affect carry propagation
* The fast multiplier can be used if the output will have less than
* MP_WARRAY digits and the number of digits won't affect carry propagation
*/
int digs = a->used + b->used + 1;
if ((digs < 512)
&& digs < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
if ((digs < MP_WARRAY)
&& MIN(a->used, b->used) <= (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
res = fast_s_mp_mul_digs (a, b, c, digs);
} else {
res = s_mp_mul (a, b, c);

View File

@ -20,10 +20,9 @@ mp_mul_2 (mp_int * a, mp_int * b)
{
int x, res, oldused;
/* Optimization: should copy and shift at the same time */
if (b->alloc < a->used) {
if ((res = mp_grow (b, a->used)) != MP_OKAY) {
/* grow to accomodate result */
if (b->alloc < a->used + 1) {
if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
return res;
}
}
@ -31,7 +30,6 @@ mp_mul_2 (mp_int * a, mp_int * b)
oldused = b->used;
b->used = a->used;
/* shift any bit count < DIGIT_BIT */
{
register mp_digit r, rr, *tmpa, *tmpb;
@ -43,37 +41,32 @@ mp_mul_2 (mp_int * a, mp_int * b)
/* carry */
r = 0;
for (x = 0; x < b->used; x++) {
for (x = 0; x < a->used; x++) {
/* get what will be the *next* carry bit from the MSB of the current digit */
rr = *tmpa >> (DIGIT_BIT - 1);
/* get what will be the *next* carry bit from the
* MSB of the current digit
*/
rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
/* now shift up this digit, add in the carry [from the previous] */
*tmpb++ = ((*tmpa++ << 1) | r) & MP_MASK;
*tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
/* copy the carry that would be from the source digit into the next iteration */
/* copy the carry that would be from the source
* digit into the next iteration
*/
r = rr;
}
/* new leading digit? */
if (r != 0) {
/* do we have to grow to accomodate the new digit? */
if (b->alloc == b->used) {
if ((res = mp_grow (b, b->used + 1)) != MP_OKAY) {
return res;
}
/* after the grow *tmpb is no longer valid so we have to reset it!
* (this bug took me about 17 minutes to find...!)
*/
tmpb = b->dp + b->used;
}
/* add a MSB which is always 1 at this point */
*tmpb = 1;
++b->used;
}
/* now zero any excess digits on the destination that we didn't write to */
/* now zero any excess digits on the destination
* that we didn't write to
*/
tmpb = b->dp + b->used;
for (x = b->used; x < oldused; x++) {
*tmpb++ = 0;

View File

@ -14,24 +14,34 @@
*/
#include <tommath.h>
/* NOTE: This routine requires updating. For instance the c->used = c->alloc bit
is wrong. We should just shift c->used digits then set the carry as c->dp[c->used] = carry
To be fixed for LTM 0.18
*/
/* shift left by a certain bit count */
int
mp_mul_2d (mp_int * a, int b, mp_int * c)
{
mp_digit d, r, rr;
int x, res;
mp_digit d;
int res;
/* copy */
if ((res = mp_copy (a, c)) != MP_OKAY) {
return res;
if (a != c) {
if ((res = mp_copy (a, c)) != MP_OKAY) {
return res;
}
}
if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
return res;
if (c->alloc < (int)(c->used + b/DIGIT_BIT + 2)) {
if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 2)) != MP_OKAY) {
return res;
}
}
/* shift by as many digits in the bit count */
if (b >= DIGIT_BIT) {
if (b >= (int)DIGIT_BIT) {
if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
return res;
}
@ -41,14 +51,15 @@ mp_mul_2d (mp_int * a, int b, mp_int * c)
/* shift any bit count < DIGIT_BIT */
d = (mp_digit) (b % DIGIT_BIT);
if (d != 0) {
register mp_digit *tmpc, mask;
register mp_digit *tmpc, mask, r, rr;
register int x;
/* bitmask for carries */
mask = (1U << d) - 1U;
mask = (((mp_digit)1) << d) - 1;
/* alias */
tmpc = c->dp;
/* carry */
r = 0;
for (x = 0; x < c->used; x++) {

View File

@ -20,6 +20,7 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
{
int res, pa, olduse;
/* make sure c is big enough to hold a*b */
pa = a->used;
if (c->alloc < pa + 1) {
if ((res = mp_grow (c, pa + 1)) != MP_OKAY) {
@ -27,7 +28,10 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
}
}
/* get the original destinations used count */
olduse = c->used;
/* set the new temporary used count */
c->used = pa + 1;
{
@ -35,21 +39,31 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
register mp_word r;
register int ix;
tmpc = c->dp + c->used;
for (ix = c->used; ix < olduse; ix++) {
*tmpc++ = 0;
}
/* alias for a->dp [source] */
tmpa = a->dp;
/* alias for c->dp [dest] */
tmpc = c->dp;
/* zero carry */
u = 0;
for (ix = 0; ix < pa; ix++) {
/* compute product and carry sum for this term */
r = ((mp_word) u) + ((mp_word) * tmpa++) * ((mp_word) b);
/* mask off higher bits to get a single digit */
*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
/* send carry into next iteration */
u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
}
*tmpc = u;
/* store final carry [if any] */
*tmpc++ = u;
/* now zero digits above the top */
for (; pa < olduse; pa++) {
*tmpc++ = 0;
}
}
mp_clamp (c);

64
bn_mp_multi.c Normal file
View File

@ -0,0 +1,64 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
#include <tommath.h>
#include <stdarg.h>
int mp_init_multi(mp_int *mp, ...)
{
mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
int n = 0; /* Number of ok inits */
mp_int* cur_arg = mp;
va_list args;
va_start(args, mp); /* init args to next argument from caller */
while (cur_arg != NULL) {
if (mp_init(cur_arg) != MP_OKAY) {
/* Oops - error! Back-track and mp_clear what we already
succeeded in init-ing, then return error.
*/
va_list clean_args;
/* end the current list */
va_end(args);
/* now start cleaning up */
cur_arg = mp;
va_start(clean_args, mp);
while (n--) {
mp_clear(cur_arg);
cur_arg = va_arg(clean_args, mp_int*);
}
va_end(clean_args);
res = MP_MEM;
break;
}
n++;
cur_arg = va_arg(args, mp_int*);
}
va_end(args);
return res; /* Assumed ok, if error flagged above. */
}
void mp_clear_multi(mp_int *mp, ...)
{
mp_int* next_mp = mp;
va_list args;
va_start(args, mp);
while (next_mp != NULL) {
mp_clear(next_mp);
next_mp = va_arg(args, mp_int*);
}
va_end(args);
}

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* determines if an integers is divisible by one of the first 256 primes or not
/* determines if an integers is divisible by one of the first 256 primes or not
*
* sets result to 0 if not, 1 if yes
*/
@ -27,7 +27,7 @@ mp_prime_is_divisible (mp_int * a, int *result)
/* default to not */
*result = 0;
for (ix = 0; ix < 256; ix++) {
for (ix = 0; ix < PRIME_SIZE; ix++) {
/* is it equal to the prime? */
if (mp_cmp_d (a, __prime_tab[ix]) == MP_EQ) {
*result = 1;

View File

@ -31,10 +31,18 @@ mp_prime_is_prime (mp_int * a, int t, int *result)
*result = 0;
/* valid value of t? */
if (t < 1 || t > 256) {
if (t < 1 || t > PRIME_SIZE) {
return MP_VAL;
}
/* is the input equal to one of the primes in the table? */
for (ix = 0; ix < PRIME_SIZE; ix++) {
if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
*result = 1;
return MP_OKAY;
}
}
/* first perform trial division */
if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
return err;

View File

@ -20,35 +20,35 @@
int mp_prime_next_prime(mp_int *a, int t)
{
int err, res;
if (mp_iseven(a) == 1) {
/* force odd */
if ((err = mp_add_d(a, 1, a)) != MP_OKAY) {
return err;
}
} else {
/* force to next number */
/* force to next odd number */
if ((err = mp_add_d(a, 2, a)) != MP_OKAY) {
return err;
}
}
}
for (;;) {
/* is this prime? */
if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) {
return err;
}
if (res == 1) {
break;
}
/* add two, next candidate */
if ((err = mp_add_d(a, 2, a)) != MP_OKAY) {
return err;
}
}
return MP_OKAY;
}

View File

@ -21,8 +21,7 @@ int
mp_reduce_setup (mp_int * a, mp_int * b)
{
int res;
if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
return res;
}
@ -30,8 +29,8 @@ mp_reduce_setup (mp_int * a, mp_int * b)
return res;
}
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
* From HAC pp.604 Algorithm 14.42
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
* From HAC pp.604 Algorithm 14.42
*/
int
mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
@ -39,15 +38,15 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
mp_int q;
int res, um = m->used;
if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
return res;
}
mp_rshd (&q, um - 1); /* q1 = x / b^(k-1) */
/* q1 = x / b^(k-1) */
mp_rshd (&q, um - 1);
/* according to HAC this is optimization is ok */
if (((unsigned long) m->used) > (1UL << (unsigned long) (DIGIT_BIT - 1UL))) {
if (((unsigned long) m->used) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
goto CLEANUP;
}
@ -57,7 +56,8 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
}
}
mp_rshd (&q, um + 1); /* q3 = q2 / b^(k+1) */
/* q3 = q2 / b^(k+1) */
mp_rshd (&q, um + 1);
/* x = x mod b^(k+1), quick (no division) */
if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
@ -70,8 +70,9 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
}
/* x = x - q */
if ((res = mp_sub (x, &q, x)) != MP_OKAY)
if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
goto CLEANUP;
}
/* If x < 0, add b^(k+1) to it */
if (mp_cmp_d (x, 0) == MP_LT) {
@ -84,8 +85,9 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
/* Back off if it's too big */
while (mp_cmp (x, m) != MP_LT) {
if ((res = s_mp_sub (x, m, x)) != MP_OKAY)
if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
break;
}
}
CLEANUP:

View File

@ -26,7 +26,7 @@ mp_rshd (mp_int * a, int b)
}
/* if b > used then simply zero it and return */
if (a->used < b) {
if (a->used <= b) {
mp_zero (a);
return;
}
@ -42,8 +42,9 @@ mp_rshd (mp_int * a, int b)
/* offset into digits */
tmpaa = a->dp + b;
/* this is implemented as a sliding window where the window is b-digits long
* and digits from the top of the window are copied to the bottom
/* this is implemented as a sliding window where
* the window is b-digits long and digits from
* the top of the window are copied to the bottom
*
* e.g.

View File

@ -16,15 +16,13 @@
/* set a 32-bit const */
int
mp_set_int (mp_int * a, unsigned long b)
mp_set_int (mp_int * a, unsigned int b)
{
int x, res;
mp_zero (a);
/* set four bits at a time, simplest solution to the what if DIGIT_BIT==7 case */
/* set four bits at a time */
for (x = 0; x < 8; x++) {
/* shift the number up four bits */
if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
return res;
@ -37,9 +35,8 @@ mp_set_int (mp_int * a, unsigned long b)
b <<= 4;
/* ensure that digits are not clamped off */
a->used += 32 / DIGIT_BIT + 1;
a->used += 32 / DIGIT_BIT + 2;
}
mp_clamp (a);
return MP_OKAY;
}

View File

@ -24,8 +24,7 @@ mp_sqr (mp_int * a, mp_int * b)
} else {
/* can we use the fast multiplier? */
if (((a->used * 2 + 1) < 512)
&& a->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT) - 1))) {
if ((a->used * 2 + 1) < 512 && a->used < (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
res = fast_s_mp_sqr (a, b);
} else {
res = s_mp_sqr (a, b);

View File

@ -20,39 +20,34 @@ mp_sub (mp_int * a, mp_int * b, mp_int * c)
{
int sa, sb, res;
sa = a->sign;
sb = b->sign;
/* handle four cases */
if (sa == MP_ZPOS && sb == MP_ZPOS) {
/* both positive, a - b, but if b>a then we do -(b - a) */
if (mp_cmp_mag (a, b) == MP_LT) {
/* b>a */
res = s_mp_sub (b, a, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub (a, b, c);
c->sign = MP_ZPOS;
}
} else if (sa == MP_ZPOS && sb == MP_NEG) {
/* a - -b == a + b */
if (sa != sb) {
/* subtract a negative from a positive, OR */
/* subtract a positive from a negative. */
/* In either case, ADD their magnitudes, */
/* and use the sign of the first number. */
c->sign = sa;
res = s_mp_add (a, b, c);
c->sign = MP_ZPOS;
} else if (sa == MP_NEG && sb == MP_ZPOS) {
/* -a - b == -(a + b) */
res = s_mp_add (a, b, c);
c->sign = MP_NEG;
} else {
/* -a - -b == b - a, but if a>b == -(a - b) */
if (mp_cmp_mag (a, b) == MP_GT) {
/* subtract a positive from a positive, OR */
/* subtract a negative from a negative. */
/* First, take the difference between their */
/* magnitudes, then... */
if (mp_cmp_mag (a, b) != MP_LT) {
/* Copy the sign from the first */
c->sign = sa;
/* The first has a larger or equal magnitude */
res = s_mp_sub (a, b, c);
c->sign = MP_NEG;
} else {
/* The result has the *opposite* sign from */
/* the first number. */
c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
/* The second has a larger magnitude */
res = s_mp_sub (b, a, c);
c->sign = MP_ZPOS;
}
}
return res;
}

View File

@ -17,7 +17,9 @@ const mp_digit __prime_tab[] = {
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
#ifndef MP_8BIT
0x0083,
0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
@ -49,4 +51,5 @@ const mp_digit __prime_tab[] = {
0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
#endif
};

View File

@ -135,3 +135,80 @@ mp_radix_size (mp_int * a, int radix)
mp_clear (&t);
return digs + 1;
}
/* read a bigint from a file stream in ASCII */
int mp_fread(mp_int *a, int radix, FILE *stream)
{
int err, ch, neg, y;
/* clear a */
mp_zero(a);
/* if first digit is - then set negative */
ch = fgetc(stream);
if (ch == '-') {
neg = MP_NEG;
ch = fgetc(stream);
} else {
neg = MP_ZPOS;
}
for (;;) {
/* find y in the radix map */
for (y = 0; y < radix; y++) {
if (s_rmap[y] == ch) {
break;
}
}
if (y == radix) {
break;
}
/* shift up and add */
if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) {
return err;
}
if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
return err;
}
ch = fgetc(stream);
}
if (mp_cmp_d(a, 0) != MP_EQ) {
a->sign = neg;
}
return MP_OKAY;
}
int mp_fwrite(mp_int *a, int radix, FILE *stream)
{
char *buf;
int err, len, x;
len = mp_radix_size(a, radix);
if (len == 0) {
return MP_VAL;
}
buf = malloc(len);
if (buf == NULL) {
return MP_MEM;
}
if ((err = mp_toradix(a, buf, radix)) != MP_OKAY) {
free(buf);
return err;
}
for (x = 0; x < len; x++) {
if (fputc(buf[x], stream) == EOF) {
free(buf);
return MP_VAL;
}
}
free(buf);
return MP_OKAY;
}

View File

@ -24,7 +24,7 @@ bn_reverse (unsigned char *s, int len)
ix = 0;
iy = len - 1;
while (ix < iy) {
t = s[ix];
t = s[ix];
s[ix] = s[iy];
s[iy] = t;
++ix;

View File

@ -28,13 +28,10 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
min = b->used;
max = a->used;
x = a;
} else if (a->used < b->used) {
} else {
min = a->used;
max = b->used;
x = b;
} else {
min = max = a->used;
x = NULL;
}
/* init result */
@ -44,11 +41,10 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
}
}
/* get old used digit count and set new one */
olduse = c->used;
c->used = max + 1;
/* add digits from lower part */
/* set the carry to zero */
{
register mp_digit u, *tmpa, *tmpb, *tmpc;
@ -65,36 +61,39 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
/* destination */
tmpc = c->dp;
/* zero the carry */
u = 0;
for (i = 0; i < min; i++) {
/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
*tmpc = *tmpa++ + *tmpb++ + u;
/* U = carry bit of T[i] */
u = *tmpc >> DIGIT_BIT;
u = *tmpc >> ((mp_digit)DIGIT_BIT);
/* take away carry bit from T[i] */
*tmpc++ &= MP_MASK;
}
/* now copy higher words if any, that is in A+B if A or B has more digits add those in */
/* now copy higher words if any, that is in A+B
* if A or B has more digits add those in
*/
if (min != max) {
for (; i < max; i++) {
/* T[i] = X[i] + U */
*tmpc = x->dp[i] + u;
/* T[i] = X[i] + U */
*tmpc = x->dp[i] + u;
/* U = carry bit of T[i] */
u = *tmpc >> DIGIT_BIT;
/* U = carry bit of T[i] */
u = *tmpc >> ((mp_digit)DIGIT_BIT);
/* take away carry bit from T[i] */
*tmpc++ &= MP_MASK;
/* take away carry bit from T[i] */
*tmpc++ &= MP_MASK;
}
}
/* add carry */
*tmpc++ = u;
/* clear digits above used (since we may not have grown result above) */
/* clear digits above oldused */
for (i = c->used; i < olduse; i++) {
*tmpc++ = 0;
}

View File

@ -15,8 +15,8 @@
#include <tommath.h>
/* multiplies |a| * |b| and only computes upto digs digits of result
* HAC pp. 595, Algorithm 14.12 Modified so you can control how many digits of
* output are created.
* HAC pp. 595, Algorithm 14.12 Modified so you can control how
* many digits of output are created.
*/
int
s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
@ -27,6 +27,13 @@ s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
mp_word r;
mp_digit tmpx, *tmpt, *tmpy;
/* can we use the fast multiplier? */
if (((digs) < MP_WARRAY) &&
MIN (a->used, b->used) <
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_s_mp_mul_digs (a, b, c, digs);
}
if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
return res;
}
@ -42,14 +49,21 @@ s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
pb = MIN (b->used, digs - ix);
/* setup some aliases */
/* copy of the digit from a used within the nested loop */
tmpx = a->dp[ix];
tmpt = &(t.dp[ix]);
/* an alias for the destination shifted ix places */
tmpt = t.dp + ix;
/* an alias for the digits of b */
tmpy = b->dp;
/* compute the columns of the output and propagate the carry */
for (iy = 0; iy < pb; iy++) {
/* compute the column as a mp_word */
r = ((mp_word) * tmpt) + ((mp_word) tmpx) * ((mp_word) * tmpy++) + ((mp_word) u);
r = ((mp_word) *tmpt) +
((mp_word) tmpx) * ((mp_word) * tmpy++) +
((mp_word) u);
/* the new column is the lower part of the result */
*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
@ -57,8 +71,10 @@ s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* get the carry word from the result */
u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
}
if (ix + iy < digs)
/* set carry if it is placed below digs */
if (ix + iy < digs) {
*tmpt = u;
}
}
mp_clamp (&t);

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* multiplies |a| * |b| and does not compute the lower digs digits
/* multiplies |a| * |b| and does not compute the lower digs digits
* [meant to get the higher part of the product]
*/
int
@ -28,8 +28,8 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* can we use the fast multiplier? */
if (((a->used + b->used + 1) < 512)
&& MAX (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
if (((a->used + b->used + 1) < MP_WARRAY)
&& MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_s_mp_mul_high_digs (a, b, c, digs);
}

View File

@ -14,7 +14,7 @@
*/
#include <tommath.h>
/* low level subtraction (assumes a > b), HAC pp.595 Algorithm 14.9 */
/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
int
s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
{
@ -34,7 +34,6 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
c->used = max;
/* sub digits from lower part */
{
register mp_digit u, *tmpa, *tmpb, *tmpc;
register int i;
@ -50,12 +49,12 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
/* T[i] = A[i] - B[i] - U */
*tmpc = *tmpa++ - *tmpb++ - u;
/* U = carry bit of T[i]
* Note this saves performing an AND operation since
/* U = carry bit of T[i]
* Note this saves performing an AND operation since
* if a carry does occur it will propagate all the way to the
* MSB. As a result a single shift is required to get the carry
*/
u = *tmpc >> (CHAR_BIT * sizeof (mp_digit) - 1);
u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
/* Clear carry from T[i] */
*tmpc++ &= MP_MASK;
@ -67,7 +66,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
*tmpc = *tmpa++ - u;
/* U = carry bit of T[i] */
u = *tmpc >> (CHAR_BIT * sizeof (mp_digit) - 1);
u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
/* Clear carry from T[i] */
*tmpc++ &= MP_MASK;

View File

@ -14,7 +14,15 @@
*/
#include <tommath.h>
/* configured for a AMD Duron Morgan core with etc/tune.c */
int KARATSUBA_MUL_CUTOFF = 73, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 121, /* Min. number of digits before Karatsuba squaring is used. */
MONTGOMERY_EXPT_CUTOFF = 128; /* max. number of digits that montgomery reductions will help for */
/* Known optimal configurations
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
-------------------------------------------------------------
Intel P4 /GCC v3.2 / 81/ 110
AMD Athlon XP /GCC v3.2 / 109/ 127
*/
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 127; /* Min. number of digits before Karatsuba squaring is used. */

261
booker.pl Normal file
View File

@ -0,0 +1,261 @@
#!/bin/perl
#
#Used to prepare the book "tommath.src" for LaTeX by pre-processing it into a .tex file
#
#Essentially you write the "tommath.src" as normal LaTex except where you want code snippets you put
#
#EXAM,file
#
#This preprocessor will then open "file" and insert it as a verbatim copy.
#
#Tom St Denis
#get graphics type
if (shift =~ /PDF/) {
$graph = "";
} else {
$graph = ".ps";
}
open(IN,"<tommath.src") or die "Can't open source file";
open(OUT,">tommath.tex") or die "Can't open destination file";
print "Scanning for sections\n";
$chapter = $section = $subsection = 0;
$x = 0;
while (<IN>) {
print ".";
if (!(++$x % 80)) { print "\n"; }
#update the headings
if (~($_ =~ /\*/)) {
if ($_ =~ /\\chapter{.+}/) {
++$chapter;
$section = $subsection = 0;
} elsif ($_ =~ /\\section{.+}/) {
++$section;
$subsection = 0;
} elsif ($_ =~ /\\subsection{.+}/) {
++$subsection;
}
}
if ($_ =~ m/MARK/) {
@m = split(",",$_);
chomp(@m[1]);
$index1{@m[1]} = $chapter;
$index2{@m[1]} = $section;
$index3{@m[1]} = $subsection;
}
}
close(IN);
open(IN,"<tommath.src") or die "Can't open source file";
$readline = $wroteline = 0;
$srcline = 0;
while (<IN>) {
++$readline;
++$srcline;
if ($_ =~ m/MARK/) {
} elsif ($_ =~ m/EXAM/ || $_ =~ m/LIST/) {
if ($_ =~ m/EXAM/) {
$skipheader = 1;
} else {
$skipheader = 0;
}
# EXAM,file
chomp($_);
@m = split(",",$_);
open(SRC,"<$m[1]") or die "Error:$srcline:Can't open source file $m[1]";
print "$srcline:Inserting $m[1]:";
$line = 0;
$tmp = $m[1];
$tmp =~ s/_/"\\_"/ge;
print OUT "\\index{$tmp}\n\\vspace{+3mm}\\begin{small}\n\\hspace{-5.1mm}{\\bf File}: $tmp\n\\vspace{-3mm}\n\\begin{alltt}\n";
$wroteline += 5;
if ($skipheader == 1) {
# scan till next end of comment, e.g. skip license
while (<SRC>) {
$text[$line++] = $_;
last if ($_ =~ /tommath\.h/);
}
}
$inline = 0;
while (<SRC>) {
$text[$line++] = $_;
++$inline;
chomp($_);
$_ =~ s/\t/" "/ge;
$_ =~ s/{/"^{"/ge;
$_ =~ s/}/"^}"/ge;
$_ =~ s/\\/'\symbol{92}'/ge;
$_ =~ s/\^/"\\"/ge;
printf OUT ("%03d ", $line);
for ($x = 0; $x < length($_); $x++) {
print OUT chr(vec($_, $x, 8));
if ($x == 75) {
print OUT "\n ";
++$wroteline;
}
}
print OUT "\n";
++$wroteline;
}
$totlines = $line;
print OUT "\\end{alltt}\n\\end{small}\n";
close(SRC);
print "$inline lines\n";
$wroteline += 2;
} elsif ($_ =~ m/@\d+,.+@/) {
# line contains [number,text]
# e.g. @14,for (ix = 0)@
$txt = $_;
while ($txt =~ m/@\d+,.+@/) {
@m = split("@",$txt); # splits into text, one, two
@parms = split(",",$m[1]); # splits one,two into two elements
# now search from $parms[0] down for $parms[1]
$found1 = 0;
$found2 = 0;
for ($i = $parms[0]; $i < $totlines && $found1 == 0; $i++) {
if ($text[$i] =~ m/\Q$parms[1]\E/) {
$foundline1 = $i + 1;
$found1 = 1;
}
}
# now search backwards
for ($i = $parms[0] - 1; $i >= 0 && $found2 == 0; $i--) {
if ($text[$i] =~ m/\Q$parms[1]\E/) {
$foundline2 = $i + 1;
$found2 = 1;
}
}
# now use the closest match or the first if tied
if ($found1 == 1 && $found2 == 0) {
$found = 1;
$foundline = $foundline1;
} elsif ($found1 == 0 && $found2 == 1) {
$found = 1;
$foundline = $foundline2;
} elsif ($found1 == 1 && $found2 == 1) {
$found = 1;
if (($foundline1 - $parms[0]) <= ($parms[0] - $foundline2)) {
$foundline = $foundline1;
} else {
$foundline = $foundline2;
}
} else {
$found = 0;
}
# if found replace
if ($found == 1) {
$delta = $parms[0] - $foundline;
print "Found replacement tag for \"$parms[1]\" on line $srcline which refers to line $foundline (delta $delta)\n";
$_ =~ s/@\Q$m[1]\E@/$foundline/;
} else {
print "ERROR: The tag \"$parms[1]\" on line $srcline was not found in the most recently parsed source!\n";
}
# remake the rest of the line
$cnt = @m;
$txt = "";
for ($i = 2; $i < $cnt; $i++) {
$txt = $txt . $m[$i] . "@";
}
}
print OUT $_;
++$wroteline;
} elsif ($_ =~ /~.+~/) {
# line contains a ~text~ pair used to refer to indexing :-)
$txt = $_;
while ($txt =~ /~.+~/) {
@m = split("~", $txt);
# word is the second position
$word = @m[1];
$a = $index1{$word};
$b = $index2{$word};
$c = $index3{$word};
# if chapter (a) is zero it wasn't found
if ($a == 0) {
print "ERROR: the tag \"$word\" on line $srcline was not found previously marked.\n";
} else {
# format the tag as x, x.y or x.y.z depending on the values
$str = $a;
$str = $str . ".$b" if ($b != 0);
$str = $str . ".$c" if ($c != 0);
if ($b == 0 && $c == 0) {
# its a chapter
if ($a <= 10) {
if ($a == 1) {
$str = "chapter one";
} elsif ($a == 2) {
$str = "chapter two";
} elsif ($a == 3) {
$str = "chapter three";
} elsif ($a == 4) {
$str = "chapter four";
} elsif ($a == 5) {
$str = "chapter five";
} elsif ($a == 6) {
$str = "chapter six";
} elsif ($a == 7) {
$str = "chapter seven";
} elsif ($a == 8) {
$str = "chapter eight";
} elsif ($a == 9) {
$str = "chapter nine";
} elsif ($a == 2) {
$str = "chapter ten";
}
} else {
$str = "chapter " . $str;
}
} else {
$str = "section " . $str if ($b != 0 && $c == 0);
$str = "sub-section " . $str if ($b != 0 && $c != 0);
}
#substitute
$_ =~ s/~\Q$word\E~/$str/;
print "Found replacement tag for marker \"$word\" on line $srcline which refers to $str\n";
}
# remake rest of the line
$cnt = @m;
$txt = "";
for ($i = 2; $i < $cnt; $i++) {
$txt = $txt . $m[$i] . "~";
}
}
print OUT $_;
++$wroteline;
} elsif ($_ =~ m/FIGU/) {
# FIGU,file,caption
chomp($_);
@m = split(",", $_);
print OUT "\\begin{center}\n\\begin{figure}[here]\n\\includegraphics{pics/$m[1]$graph}\n";
print OUT "\\caption{$m[2]}\n\\end{figure}\n\\end{center}\n";
$wroteline += 4;
} else {
print OUT $_;
++$wroteline;
}
}
print "Read $readline lines, wrote $wroteline lines\n";
close (OUT);
close (IN);

View File

@ -1,3 +1,37 @@
May 17th, 2003
v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well
as several smaller suggestions. Thanks!
-- removed call to mp_cmp in inner loop of mp_div and put mp_cmp_mag in its place :-)
-- Fixed bug in mp_exptmod that would cause it to fail for odd moduli when DIGIT_BIT != 28
-- mp_exptmod now also returns errors if the modulus is negative and will handle negative exponents
-- mp_prime_is_prime will now return true if the input is one of the primes in the prime table
-- Damian M Gryski (dgryski@uwaterloo.ca) found a index out of bounds error in the
mp_fast_s_mp_mul_high_digs function which didn't come up before. (fixed)
-- Refactored the DR reduction code so there is only one function per file.
-- Fixed bug in the mp_mul() which would erroneously avoid the faster multiplier [comba] when it was
allowed. The bug would not cause the incorrect value to be produced just less efficient (fixed)
-- Fixed similar bug in the Montgomery reduction code.
-- Added tons of (mp_digit) casts so the 7/15/28/31 bit digit code will work flawlessly out of the box.
Also added limited support for 64-bit machines with a 60-bit digit. Both thanks to Tom Wu (tom@arcot.com)
-- Added new comments here and there, cleaned up some code [style stuff]
-- Fixed a lingering typo in mp_exptmod* that would set bitcnt to zero then one. Very silly stuff :-)
-- Fixed up mp_exptmod_fast so it would set "redux" to the comba Montgomery reduction if allowed. This
saves quite a few calls and if statements.
-- Added etc/mont.c a test of the Montgomery reduction [assuming all else works :-| ]
-- Fixed up etc/tune.c to use a wider test range [more appropriate] also added a x86 based addition which
uses RDTSC for high precision timing.
-- Updated demo/demo.c to remove MPI stuff [won't work anyways], made the tests run for 2 seconds each so its
not so insanely slow. Also made the output space delimited [and fixed up various errors]
-- Added logs directory, logs/graph.dem which will use gnuplot to make a series of PNG files
that go with the pre-made index.html. You have to build [via make timing] and run ltmtest first in the
root of the package.
-- Fixed a bug in mp_sub and mp_add where "-a - -a" or "-a + a" would produce -0 as the result [obviously invalid].
-- Fixed a bug in mp_rshd. If the count == a.used it should zero/return [instead of shifting]
-- Fixed a "off-by-one" bug in mp_mul2d. The initial size check on alloc would be off by one if the residue
shifting caused a carry.
-- Fixed a bug where s_mp_mul_digs() would not call the Comba based routine if allowed. This made Barrett reduction
slower than it had to be.
Mar 29th, 2003
v0.16 -- Sped up mp_div by making normalization one shift call
-- Sped up mp_mul_2d/mp_div_2d by aliasing pointers :-)

View File

@ -1,21 +1,6 @@
#include <time.h>
#ifdef U_MPI
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include "mpi.h"
#ifdef _MSC_VER
typedef __int64 ulong64;
#else
typedef unsigned long long ulong64;
#endif
#else
#include "tommath.h"
#endif
#include "tommath.h"
#ifdef TIMER
ulong64 _tt;
@ -23,19 +8,11 @@ void reset(void) { _tt = clock(); }
ulong64 rdtsc(void) { return clock() - _tt; }
#endif
#ifndef DEBUG
int _ifuncs;
#else
extern int _ifuncs;
extern void dump_timings(void);
extern void reset_timings(void);
#endif
void ndraw(mp_int *a, char *name)
{
char buf[4096];
printf("%s: ", name);
mp_toradix(a, buf, 10);
mp_toradix(a, buf, 64);
printf("%s\n", buf);
}
@ -56,31 +33,13 @@ int lbit(void)
lfsr <<= 1;
return 0;
}
}
#ifdef U_MPI
int mp_reduce_setup(mp_int *a, mp_int *b)
{
int res;
mp_set(a, 1);
if ((res = s_mp_lshd(a, b->used * 2)) != MP_OKAY) {
return res;
}
return mp_div(a, b, a, NULL);
}
int mp_rand(mp_int *a, int c)
{
long z = abs(rand()) & 65535;
mp_set(a, z?z:1);
while (c--) {
s_mp_lshd(a, 1);
mp_add_d(a, abs(rand()), a);
}
return MP_OKAY;
}
#endif
#define DO2(x) x; x;
#define DO4(x) DO2(x); DO2(x);
#define DO8(x) DO4(x); DO4(x);
#define DO(x) DO8(x); DO8(x);
char cmd[4096], buf[4096];
int main(void)
@ -89,12 +48,12 @@ int main(void)
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n,
div2_n, mul2_n;
unsigned rr;
int cnt, ix;
int cnt, ix, old_kara_m, old_kara_s;
#ifdef TIMER
int n;
ulong64 tt;
FILE *log;
FILE *log, *logb;
#endif
mp_init(&a);
@ -102,11 +61,11 @@ int main(void)
mp_init(&c);
mp_init(&d);
mp_init(&e);
mp_init(&f);
mp_init(&f);
/* test the DR reduction */
#if 0
srand(time(NULL));
for (cnt = 2; cnt < 32; cnt++) {
printf("%d digit modulus\n", cnt);
@ -117,89 +76,103 @@ int main(void)
}
a.used = cnt;
mp_prime_next_prime(&a, 3);
mp_rand(&b, cnt - 1);
mp_copy(&b, &c);
rr = 0;
do {
if (!(rr & 127)) { printf("%9lu\r", rr); fflush(stdout); }
mp_sqr(&b, &b); mp_add_d(&b, 1, &b);
mp_copy(&b, &c);
mp_mod(&b, &a, &b);
mp_dr_reduce(&c, &a, (1<<DIGIT_BIT)-a.dp[0]);
if (mp_cmp(&b, &c) != MP_EQ) {
printf("Failed on trial %lu\n", rr); exit(-1);
}
} while (++rr < 1000000);
} while (++rr < 1000000);
printf("Passed DR test for %d digits\n", cnt);
}
#endif
#endif
#ifdef TIMER
printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC);
goto sqrtime;
log = fopen("add.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
for (rr = 0; rr < 10000000; rr++) {
mp_add(&a, &b, &c);
}
rr = 0;
do {
DO(mp_add(&a,&b,&c));
rr += 16;
} while (rdtsc() < (CLOCKS_PER_SEC * 2));
tt = rdtsc();
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d,%9llu\n", cnt, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
log = fopen("sub.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
for (rr = 0; rr < 10000000; rr++) {
mp_sub(&a, &b, &c);
}
rr = 0;
do {
DO(mp_sub(&a,&b,&c));
rr += 16;
} while (rdtsc() < (CLOCKS_PER_SEC * 2));
tt = rdtsc();
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d,%9llu\n", cnt, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
sqrtime:
log = fopen("sqr.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
mp_rand(&a, cnt);
reset();
for (rr = 0; rr < 250000; rr++) {
mp_sqr(&a, &b);
}
tt = rdtsc();
printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d,%9llu\n", cnt, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
log = fopen("mult.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
for (rr = 0; rr < 250000; rr++) {
mp_mul(&a, &b, &c);
}
tt = rdtsc();
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d,%9llu\n", cnt, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
/* do mult/square twice, first without karatsuba and second with */
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
for (ix = 0; ix < 2; ix++) {
printf("With%s Karatsuba\n", (ix==0)?"out":"");
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 16) {
mp_rand(&a, cnt);
reset();
rr = 0;
do {
DO(mp_sqr(&a, &b));
rr += 16;
} while (rdtsc() < (CLOCKS_PER_SEC * 2));
tt = rdtsc();
printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 16) {
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_mul(&a, &b, &c));
rr += 16;
} while (rdtsc() < (CLOCKS_PER_SEC * 2));
tt = rdtsc();
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
}
expttime:
{
char *primes[] = {
/* DR moduli */
@ -210,7 +183,7 @@ expttime:
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
@ -219,9 +192,10 @@ expttime:
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
NULL
};
log = fopen("expt.log", "w");
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
for (n = 0; primes[n]; n++) {
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
@ -234,9 +208,11 @@ expttime:
mp_mod(&b, &c, &b);
mp_set(&c, 3);
reset();
for (rr = 0; rr < 50; rr++) {
mp_exptmod(&c, &b, &a, &d);
}
rr = 0;
do {
DO(mp_exptmod(&c, &b, &a, &d));
rr += 16;
} while (rdtsc() < (CLOCKS_PER_SEC * 2));
tt = rdtsc();
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
@ -248,25 +224,28 @@ expttime:
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d,%9llu\n", cnt, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
fprintf((n < 7) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
}
}
fclose(log);
fclose(logb);
log = fopen("invmod.log", "w");
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
mp_rand(&a, cnt);
mp_rand(&b, cnt);
do {
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
} while (mp_cmp_d(&c, 1) != MP_EQ);
reset();
for (rr = 0; rr < 10000; rr++) {
mp_invmod(&b, &a, &c);
}
rr = 0;
do {
DO(mp_invmod(&b, &a, &c));
rr += 16;
} while (rdtsc() < (CLOCKS_PER_SEC * 2));
tt = rdtsc();
mp_mulmod(&b, &c, &a, &d);
if (mp_cmp_d(&d, 1) != MP_EQ) {
@ -274,18 +253,18 @@ expttime:
return 0;
}
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt, tt);
fprintf(log, "%d,%9llu\n", cnt, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((unsigned long long)rr)*CLOCKS_PER_SEC)/tt);
}
fclose(log);
return 0;
#endif
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = 0;
for (;;) {
/* randomly clear and re-init one variable, this has the affect of triming the alloc space */
switch (abs(rand()) % 7) {
case 0: mp_clear(&a); mp_init(&a); break;
@ -296,17 +275,17 @@ expttime:
case 5: mp_clear(&f); mp_init(&f); break;
case 6: break; /* don't clear any */
}
printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu ", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, div2_n, mul2_n);
fgets(cmd, 4095, stdin);
cmd[strlen(cmd)-1] = 0;
printf("%s ]\r",cmd); fflush(stdout);
if (!strcmp(cmd, "mul2d")) { ++mul2d_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
if (!strcmp(cmd, "mul2d")) { ++mul2d_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); sscanf(buf, "%d", &rr);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
mp_mul_2d(&a, rr, &a);
a.sign = b.sign;
if (mp_cmp(&a, &b) != MP_EQ) {
@ -315,11 +294,11 @@ expttime:
draw(&b);
return 0;
}
} else if (!strcmp(cmd, "div2d")) { ++div2d_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
} else if (!strcmp(cmd, "div2d")) { ++div2d_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); sscanf(buf, "%d", &rr);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
mp_div_2d(&a, rr, &a, &e);
a.sign = b.sign;
if (a.used == b.used && a.used == 0) { a.sign = b.sign = MP_ZPOS; }
@ -330,19 +309,19 @@ expttime:
return 0;
}
} else if (!strcmp(cmd, "add")) { ++add_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
mp_copy(&a, &d);
mp_add(&d, &b, &d);
if (mp_cmp(&c, &d) != MP_EQ) {
printf("add %lu failure!\n", add_n);
draw(&a);draw(&b);draw(&c);draw(&d);
printf("add %lu failure!\n", add_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
/* test the sign/unsigned storage functions */
rr = mp_signed_bin_size(&c);
mp_to_signed_bin(&c, (unsigned char *)cmd);
memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
@ -353,8 +332,8 @@ draw(&a);draw(&b);draw(&c);draw(&d);
draw(&d);
return 0;
}
rr = mp_unsigned_bin_size(&c);
mp_to_unsigned_bin(&c, (unsigned char *)cmd);
memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
@ -367,90 +346,90 @@ draw(&a);draw(&b);draw(&c);draw(&d);
}
} else if (!strcmp(cmd, "sub")) { ++sub_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
mp_copy(&a, &d);
mp_sub(&d, &b, &d);
if (mp_cmp(&c, &d) != MP_EQ) {
printf("sub %lu failure!\n", sub_n);
draw(&a);draw(&b);draw(&c);draw(&d);
printf("sub %lu failure!\n", sub_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "mul")) { ++mul_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
mp_copy(&a, &d);
mp_mul(&d, &b, &d);
if (mp_cmp(&c, &d) != MP_EQ) {
printf("mul %lu failure!\n", mul_n);
draw(&a);draw(&b);draw(&c);draw(&d);
printf("mul %lu failure!\n", mul_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "div")) { ++div_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&d, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&d, buf, 64);
mp_div(&a, &b, &e, &f);
if (mp_cmp(&c, &e) != MP_EQ || mp_cmp(&d, &f) != MP_EQ) {
printf("div %lu failure!\n", div_n);
printf("div %lu failure!\n", div_n);
draw(&a);draw(&b);draw(&c);draw(&d); draw(&e); draw(&f);
return 0;
}
} else if (!strcmp(cmd, "sqr")) { ++sqr_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
mp_copy(&a, &c);
mp_sqr(&c, &c);
if (mp_cmp(&b, &c) != MP_EQ) {
printf("sqr %lu failure!\n", sqr_n);
printf("sqr %lu failure!\n", sqr_n);
draw(&a);draw(&b);draw(&c);
return 0;
}
} else if (!strcmp(cmd, "gcd")) { ++gcd_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
mp_copy(&a, &d);
mp_gcd(&d, &b, &d);
d.sign = c.sign;
if (mp_cmp(&c, &d) != MP_EQ) {
printf("gcd %lu failure!\n", gcd_n);
printf("gcd %lu failure!\n", gcd_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "lcm")) { ++lcm_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
mp_copy(&a, &d);
mp_lcm(&d, &b, &d);
d.sign = c.sign;
if (mp_cmp(&c, &d) != MP_EQ) {
printf("lcm %lu failure!\n", lcm_n);
printf("lcm %lu failure!\n", lcm_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "expt")) { ++expt_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&d, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&d, buf, 64);
mp_copy(&a, &e);
mp_exptmod(&e, &b, &c, &e);
if (mp_cmp(&d, &e) != MP_EQ) {
printf("expt %lu failure!\n", expt_n);
printf("expt %lu failure!\n", expt_n);
draw(&a);draw(&b);draw(&c);draw(&d); draw(&e);
return 0;
}
} else if (!strcmp(cmd, "invmod")) { ++inv_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&c, buf, 64);
mp_invmod(&a, &b, &d);
mp_mulmod(&d,&a,&b,&e);
if (mp_cmp_d(&e, 1) != MP_EQ) {
@ -460,10 +439,10 @@ draw(&a);draw(&b);draw(&c);draw(&d);
draw(&e);
return 0;
}
} else if (!strcmp(cmd, "div2")) { ++div2_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
mp_div_2(&a, &c);
if (mp_cmp(&c, &b) != MP_EQ) {
printf("div_2 %lu failure\n", div2_n);
@ -473,8 +452,8 @@ draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "mul2")) { ++mul2_n;
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 10);
fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64);
mp_mul_2(&a, &c);
if (mp_cmp(&c, &b) != MP_EQ) {
printf("mul_2 %lu failure\n", mul2_n);
@ -483,9 +462,9 @@ draw(&a);draw(&b);draw(&c);draw(&d);
draw(&c);
return 0;
}
}
}
}
return 0;
return 0;
}

0
demo/test.c Normal file
View File

View File

@ -1,23 +1,40 @@
CFLAGS += -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops -I../
# default lib name (requires install with root)
# LIBNAME=-ltommath
# libname when you can't install the lib with install
LIBNAME=../libtommath.a
#provable primes
pprime: pprime.o
$(CC) pprime.o $(LIBNAME) -o pprime
# portable [well requires clock()] tuning app
tune: tune.o
$(CC) tune.o $(LIBNAME) -o tune
# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
tune86: tune.c
nasm -f coff timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
#make tune86 for linux or any ELF format
tune86l: tune.c
nasm -f elf -DUSE_ELF timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
# spits out mersenne primes
mersenne: mersenne.o
$(CC) mersenne.o $(LIBNAME) -o mersenne
# fines DR safe primes for the given config
drprime: drprime.o
$(CC) drprime.o $(LIBNAME) -o drprime
mont: mont.o
$(CC) mont.o $(LIBNAME) -o mont
clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont

45
etc/mont.c Normal file
View File

@ -0,0 +1,45 @@
/* tests the montgomery routines */
#include <tommath.h>
int main(void)
{
mp_int modulus, R, p, pp;
mp_digit mp;
long x, y;
mp_init_multi(&modulus, &R, &p, &pp, NULL);
/* loop through various sizes */
for (x = 4; x < 128; x++) {
printf("DIGITS == %3ld...", x); fflush(stdout);
/* make up the odd modulus */
mp_rand(&modulus, x);
modulus.dp[0] |= 1;
/* now find the R value */
mp_montgomery_calc_normalization(&R, &modulus);
mp_montgomery_setup(&modulus, &mp);
/* now run through a bunch tests */
for (y = 0; y < 100000; y++) {
mp_rand(&p, x/2); /* p = random */
mp_mul(&p, &R, &pp); /* pp = R * p */
mp_montgomery_reduce(&pp, &modulus, mp);
/* should be equal to p */
if (mp_cmp(&pp, &p) != MP_EQ) {
printf("FAILURE!\n");
exit(-1);
}
}
printf("PASSED\n");
}
return 0;
}

37
etc/timer.asm Normal file
View File

@ -0,0 +1,37 @@
; x86 timer in NASM
;
; Tom St Denis, tomstdenis@iahu.ca
[bits 32]
[section .data]
time dd 0, 0
[section .text]
%ifdef USE_ELF
[global t_start]
t_start:
%else
[global _t_start]
_t_start:
%endif
push edx
push eax
rdtsc
mov [time+0],edx
mov [time+4],eax
pop eax
pop edx
ret
%ifdef USE_ELF
[global t_read]
t_read:
%else
[global _t_read]
_t_read:
%endif
rdtsc
sub eax,[time+4]
sbb edx,[time+0]
ret

View File

@ -5,10 +5,21 @@
#include <tommath.h>
#include <time.h>
clock_t
#ifndef X86_TIMER
/* generic ISO C timer */
unsigned long long __T;
void t_start(void) { __T = clock(); }
unsigned long long t_read(void) { return clock() - __T; }
#else
extern void t_start(void);
extern unsigned long long t_read(void);
#endif
unsigned long long
time_mult (void)
{
clock_t t1;
int x, y;
mp_int a, b, c;
@ -16,137 +27,83 @@ time_mult (void)
mp_init (&b);
mp_init (&c);
t1 = clock ();
for (x = 4; x <= 144; x += 4) {
t_start();
for (x = 32; x <= 288; x += 4) {
mp_rand (&a, x);
mp_rand (&b, x);
for (y = 0; y < 10000; y++) {
for (y = 0; y < 100; y++) {
mp_mul (&a, &b, &c);
}
}
mp_clear (&a);
mp_clear (&b);
mp_clear (&c);
return clock () - t1;
return t_read();
}
clock_t
unsigned long long
time_sqr (void)
{
clock_t t1;
int x, y;
mp_int a, b;
mp_init (&a);
mp_init (&b);
t1 = clock ();
for (x = 4; x <= 144; x += 4) {
t_start();
for (x = 32; x <= 288; x += 4) {
mp_rand (&a, x);
for (y = 0; y < 10000; y++) {
for (y = 0; y < 100; y++) {
mp_sqr (&a, &b);
}
}
mp_clear (&a);
mp_clear (&b);
return clock () - t1;
}
clock_t
time_expt (void)
{
clock_t t1;
int x, y;
mp_int a, b, c, d;
mp_init (&a);
mp_init (&b);
mp_init (&c);
mp_init (&d);
t1 = clock ();
for (x = 4; x <= 144; x += 4) {
mp_rand (&a, x);
mp_rand (&b, x);
mp_rand (&c, x);
if (mp_iseven (&c) != 0) {
mp_add_d (&c, 1, &c);
}
for (y = 0; y < 10; y++) {
mp_exptmod (&a, &b, &c, &d);
}
}
mp_clear (&d);
mp_clear (&c);
mp_clear (&b);
mp_clear (&a);
return clock () - t1;
return t_read();
}
int
main (void)
{
int best_mult, best_square, best_exptmod;
clock_t best, ti;
int best_mult, best_square;
unsigned long long best, ti;
FILE *log;
best_mult = best_square = best_exptmod = 0;
best_mult = best_square = 0;
/* tune multiplication first */
log = fopen ("mult.log", "w");
best = CLOCKS_PER_SEC * 1000;
for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 144; KARATSUBA_MUL_CUTOFF++) {
best = -1;
for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 200; KARATSUBA_MUL_CUTOFF++) {
ti = time_mult ();
printf ("%4d : %9lu\r", KARATSUBA_MUL_CUTOFF, ti);
fprintf (log, "%d, %lu\n", KARATSUBA_MUL_CUTOFF, ti);
printf ("%4d : %9llu\r", KARATSUBA_MUL_CUTOFF, ti);
fprintf (log, "%d, %llu\n", KARATSUBA_MUL_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %lu, %d \n", ti, KARATSUBA_MUL_CUTOFF);
printf ("New best: %llu, %d \n", ti, KARATSUBA_MUL_CUTOFF);
best = ti;
best_mult = KARATSUBA_MUL_CUTOFF;
}
}
fclose (log);
/* tune squaring */
log = fopen ("sqr.log", "w");
best = CLOCKS_PER_SEC * 1000;
for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 144; KARATSUBA_SQR_CUTOFF++) {
best = -1;
for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 200; KARATSUBA_SQR_CUTOFF++) {
ti = time_sqr ();
printf ("%4d : %9lu\r", KARATSUBA_SQR_CUTOFF, ti);
fprintf (log, "%d, %lu\n", KARATSUBA_SQR_CUTOFF, ti);
printf ("%4d : %9llu\r", KARATSUBA_SQR_CUTOFF, ti);
fprintf (log, "%d, %llu\n", KARATSUBA_SQR_CUTOFF, ti);
fflush (stdout);
if (ti < best) {
printf ("New best: %lu, %d \n", ti, KARATSUBA_SQR_CUTOFF);
printf ("New best: %llu, %d \n", ti, KARATSUBA_SQR_CUTOFF);
best = ti;
best_square = KARATSUBA_SQR_CUTOFF;
}
}
fclose (log);
/* tune exptmod */
KARATSUBA_MUL_CUTOFF = best_mult;
KARATSUBA_SQR_CUTOFF = best_square;
log = fopen ("expt.log", "w");
best = CLOCKS_PER_SEC * 1000;
for (MONTGOMERY_EXPT_CUTOFF = 8; MONTGOMERY_EXPT_CUTOFF <= 144; MONTGOMERY_EXPT_CUTOFF++) {
ti = time_expt ();
printf ("%4d : %9lu\r", MONTGOMERY_EXPT_CUTOFF, ti);
fflush (stdout);
fprintf (log, "%d : %lu\r", MONTGOMERY_EXPT_CUTOFF, ti);
if (ti < best) {
printf ("New best: %lu, %d\n", ti, MONTGOMERY_EXPT_CUTOFF);
best = ti;
best_exptmod = MONTGOMERY_EXPT_CUTOFF;
}
}
fclose (log);
printf
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\nMontgomery exptmod Cutoff: %d\n",
best_mult, best_square, best_exptmod);
("\n\n\nKaratsuba Multiplier Cutoff: %d\nKaratsuba Squaring Cutoff: %d\n",
best_mult, best_square);
return 0;
}

37
gen.pl
View File

@ -1,27 +1,18 @@
#!/usr/bin/perl
#!/usr/bin/perl -w
#
#Generates a "single file" you can use to quickly add the whole source
#without any makefile troubles
# Generates a "single file" you can use to quickly
# add the whole source without any makefile troubles
#
use strict;
opendir(DIR,".");
@files = readdir(DIR);
closedir(DIR);
open(OUT,">mpi.c");
print OUT "/* File Generated Automatically by gen.pl */\n\n";
for (@files) {
if ($_ =~ /\.c/ && !($_ =~ /mpi\.c/)) {
$fname = $_;
open(SRC,"<$fname");
print OUT "/* Start: $fname */\n";
while (<SRC>) {
print OUT $_;
}
close(SRC);
print OUT "\n/* End: $fname */\n\n";
}
open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!";
foreach my $filename (glob "bn_*.c") {
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
print OUT "/* Start: $filename */\n";
print OUT qq[#line 0 "$filename"\n];
print OUT while <SRC>;
print OUT "\n/* End: $filename */\n\n";
close SRC or die "Error closing $filename after reading: $!";
}
print OUT "\n/* EOF */\n";
close(OUT);
print OUT "\b/* EOF */\n";
close OUT or die "Error closing mpi.c after writing: $!";

13
logs/README Normal file
View File

@ -0,0 +1,13 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

16
logs/add.log Normal file
View File

@ -0,0 +1,16 @@
224 11039864
448 9206336
672 8178200
896 7432176
1120 6433264
1344 5847056
1568 5270184
1792 4943416
2016 4520016
2240 4256168
2464 3999224
2688 3714896
2912 3572720
3136 3340176
3360 3222584
3584 3036336

BIN
logs/addsub.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

7
logs/expt.log Normal file
View File

@ -0,0 +1,7 @@
14364 666
21532 253
28700 117
57372 17
71708 9
86044 5
114716 2

BIN
logs/expt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

7
logs/expt_dr.log Normal file
View File

@ -0,0 +1,7 @@
14896 1088
21952 468
29008 244
43120 91
58016 43
86240 15
115248 6

17
logs/graphs.dem Normal file
View File

@ -0,0 +1,17 @@
set terminal png color
set size 1.5
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

24
logs/index.html Normal file
View File

@ -0,0 +1,24 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

32
logs/invmod.log Normal file
View File

@ -0,0 +1,32 @@
112 15608
224 7840
336 5104
448 3376
560 2616
672 1984
784 1640
896 2056
1008 1136
1120 936
1232 1240
1344 1112
1456 608
1568 873
1680 492
1792 444
1904 640
2016 584
2128 328
2240 307
2352 283
2464 256
2576 393
2688 365
2800 344
2912 196
3024 301
3136 170
3248 160
3360 250
3472 144
3584 224

BIN
logs/invmod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

17
logs/mult.log Normal file
View File

@ -0,0 +1,17 @@
896 321504
1344 150784
1792 90288
2240 59760
2688 42480
3136 32056
3584 24600
4032 19656
4480 16024
4928 13328
5376 11280
5824 9624
6272 8336
6720 7280
7168 1648
7616 1464
8064 1296

BIN
logs/mult.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

17
logs/mult_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 321928
1344 150752
1792 90136
2240 59888
2688 42480
3136 32080
3584 25744
4032 21216
4480 17912
4928 14896
5376 12936
5824 11216
6272 9848
6720 8896
7168 7968
7616 7248
8064 6600

17
logs/sqr.log Normal file
View File

@ -0,0 +1,17 @@
896 416968
1344 223672
1792 141552
2240 97280
2688 71304
3136 54648
3584 16264
4032 13000
4480 10528
4928 8776
5376 7464
5824 6440
6272 5520
6720 4808
7168 4264
7616 3784
8064 3368

17
logs/sqr_kara.log Normal file
View File

@ -0,0 +1,17 @@
896 416656
1344 223728
1792 141288
2240 97456
2688 71152
3136 54392
3584 38552
4032 32216
4480 27384
4928 23792
5376 20728
5824 18232
6272 16160
6720 14408
7168 11696
7616 10768
8064 9920

16
logs/sub.log Normal file
View File

@ -0,0 +1,16 @@
224 9862520
448 8562344
672 7661400
896 6838128
1120 5911144
1344 5394040
1568 4993760
1792 4624240
2016 4332024
2240 4029312
2464 3790784
2688 3587216
2912 3397952
3136 3239736
3360 3080616
3584 2933104

View File

@ -1,6 +1,6 @@
CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops
VERSION=0.16
VERSION=0.17
default: libtommath.a
@ -32,7 +32,8 @@ bn_mp_count_bits.o bn_mp_read_unsigned_bin.o bn_mp_read_signed_bin.o bn_mp_to_un
bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o bn_radix.o \
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o bn_mp_multi.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o
libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
@ -52,21 +53,46 @@ test: libtommath.a demo/demo.o
timing: libtommath.a
$(CC) $(CFLAGS) -DTIMER demo/demo.c libtommath.a -o ltmtest -s
$(CC) $(CFLAGS) -DTIMER -DU_MPI -I./mtest/ demo/demo.c mtest/mpi.c -o mpitest -s
docdvi: bn.tex
latex bn
# makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think]
docdvi: tommath.src
cd pics ; make
echo "hello" > tommath.ind
perl booker.pl
latex tommath > /dev/null
makeindex tommath
latex tommath > /dev/null
# makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files
docs:
cd pics ; make pdfes
echo "hello" > tommath.ind
perl booker.pl
latex tommath > /dev/null
makeindex tommath
latex tommath > /dev/null
dvips -tB5 -D600 tommath
echo "hello" > tommath.ind
perl booker.pl PDF
latex tommath > /dev/null
makeindex tommath
latex tommath > /dev/null
pdflatex tommath
rm -f tommath.log tommath.aux tommath.dvi tommath.idx tommath.toc tommath.lof tommath.ind tommath.ilg
docs: docdvi
#the old manual being phased out
manual:
latex bn
pdflatex bn
rm -f bn.log bn.aux bn.dvi
rm -f bn.aux bn.dvi bn.log
clean:
rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
bn.log bn.aux bn.dvi *.log *.s mpi.c
tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c
cd etc ; make clean
cd pics ; make clean
zipup: clean docs
zipup: clean manual
perl gen.pl ; mv mpi.c pre_gen/ ; \
cd .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \
cp -R ./libtommath/* ./libtommath-$(VERSION)/ ; tar -c libtommath-$(VERSION)/* > ltm-$(VERSION).tar ; \

View File

@ -22,7 +22,8 @@ bn_mp_count_bits.obj bn_mp_read_unsigned_bin.obj bn_mp_read_signed_bin.obj bn_mp
bn_mp_to_signed_bin.obj bn_mp_unsigned_bin_size.obj bn_mp_signed_bin_size.obj bn_radix.obj \
bn_mp_xor.obj bn_mp_and.obj bn_mp_or.obj bn_mp_rand.obj bn_mp_montgomery_calc_normalization.obj \
bn_mp_prime_is_divisible.obj bn_prime_tab.obj bn_mp_prime_fermat.obj bn_mp_prime_miller_rabin.obj \
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj
bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj bn_mp_multi.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj
library: $(OBJECTS)

View File

@ -10,7 +10,7 @@ result1
result2
[... resultN]
So for example "a * b mod n" would be
So for example "a * b mod n" would be
mulmod
a
@ -18,7 +18,7 @@ b
n
a*b mod n
e.g. if a=3, b=4 n=11 then
e.g. if a=3, b=4 n=11 then
mulmod
3
@ -38,10 +38,10 @@ FILE *rng;
void rand_num(mp_int *a)
{
int n, size;
unsigned char buf[512];
unsigned char buf[2048];
top:
size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
size = 1 + ((fgetc(rng)*fgetc(rng)) % 1024);
buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng);
for (n = 0; n < size; n++) {
@ -54,7 +54,7 @@ top:
void rand_num2(mp_int *a)
{
int n, size;
unsigned char buf[512];
unsigned char buf[2048];
top:
size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
@ -67,18 +67,38 @@ top:
mp_read_raw(a, buf, 1+size);
}
#define mp_to64(a, b) mp_toradix(a, b, 64)
int main(void)
{
int n;
mp_int a, b, c, d, e;
char buf[4096];
mp_init(&a);
mp_init(&b);
mp_init(&c);
mp_init(&d);
mp_init(&e);
/* initial (2^n - 1)^2 testing, makes sure the comba multiplier works [it has the new carry code] */
/*
mp_set(&a, 1);
for (n = 1; n < 8192; n++) {
mp_mul(&a, &a, &c);
printf("mul\n");
mp_to64(&a, buf);
printf("%s\n%s\n", buf, buf);
mp_to64(&c, buf);
printf("%s\n", buf);
mp_add_d(&a, 1, &a);
mp_mul_2(&a, &a);
mp_sub_d(&a, 1, &a);
}
*/
rng = fopen("/dev/urandom", "rb");
if (rng == NULL) {
rng = fopen("/dev/random", "rb");
@ -97,11 +117,11 @@ int main(void)
rand_num(&b);
mp_add(&a, &b, &c);
printf("add\n");
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
mp_to64(&c, buf);
printf("%s\n", buf);
} else if (n == 1) {
/* sub tests */
@ -109,11 +129,11 @@ int main(void)
rand_num(&b);
mp_sub(&a, &b, &c);
printf("sub\n");
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
mp_to64(&c, buf);
printf("%s\n", buf);
} else if (n == 2) {
/* mul tests */
@ -121,11 +141,11 @@ int main(void)
rand_num(&b);
mp_mul(&a, &b, &c);
printf("mul\n");
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
mp_to64(&c, buf);
printf("%s\n", buf);
} else if (n == 3) {
/* div tests */
@ -133,22 +153,22 @@ int main(void)
rand_num(&b);
mp_div(&a, &b, &c, &d);
printf("div\n");
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
mp_to64(&c, buf);
printf("%s\n", buf);
mp_todecimal(&d, buf);
mp_to64(&d, buf);
printf("%s\n", buf);
} else if (n == 4) {
/* sqr tests */
rand_num(&a);
mp_sqr(&a, &b);
printf("sqr\n");
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
} else if (n == 5) {
/* mul_2d test */
@ -156,11 +176,11 @@ int main(void)
mp_copy(&a, &b);
n = fgetc(rng) & 63;
mp_mul_2d(&b, n, &b);
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("mul2d\n");
printf("%s\n", buf);
printf("%d\n", n);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
} else if (n == 6) {
/* div_2d test */
@ -168,11 +188,11 @@ int main(void)
mp_copy(&a, &b);
n = fgetc(rng) & 63;
mp_div_2d(&b, n, &b, NULL);
mp_todecimal(&a, buf);
mp_to64(&a, buf);
printf("div2d\n");
printf("%s\n", buf);
printf("%d\n", n);
mp_todecimal(&b, buf);
mp_to64(&b, buf);
printf("%s\n", buf);
} else if (n == 7) {
/* gcd test */
@ -182,12 +202,12 @@ int main(void)
b.sign = MP_ZPOS;
mp_gcd(&a, &b, &c);
printf("gcd\n");
mp_todecimal(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
printf("%s\n", buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_to64(&c, buf);
printf("%s\n", buf);
} else if (n == 8) {
/* lcm test */
rand_num(&a);
@ -196,12 +216,12 @@ int main(void)
b.sign = MP_ZPOS;
mp_lcm(&a, &b, &c);
printf("lcm\n");
mp_todecimal(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
printf("%s\n", buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_to64(&c, buf);
printf("%s\n", buf);
} else if (n == 9) {
/* exptmod test */
rand_num2(&a);
@ -210,14 +230,14 @@ int main(void)
a.sign = b.sign = c.sign = 0;
mp_exptmod(&a, &b, &c, &d);
printf("expt\n");
mp_todecimal(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
printf("%s\n", buf);
mp_todecimal(&d, buf);
printf("%s\n", buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_to64(&c, buf);
printf("%s\n", buf);
mp_to64(&d, buf);
printf("%s\n", buf);
} else if (n == 10) {
/* invmod test */
rand_num2(&a);
@ -229,28 +249,28 @@ int main(void)
if (mp_cmp_d(&b, 1) == 0) continue;
mp_invmod(&a, &b, &c);
printf("invmod\n");
mp_todecimal(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
printf("%s\n", buf);
mp_todecimal(&c, buf);
printf("%s\n", buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_to64(&b, buf);
printf("%s\n", buf);
mp_to64(&c, buf);
printf("%s\n", buf);
} else if (n == 11) {
rand_num(&a);
mp_mul_2(&a, &a);
mp_div_2(&a, &b);
printf("div2\n");
mp_todecimal(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_to64(&b, buf);
printf("%s\n", buf);
} else if (n == 12) {
rand_num2(&a);
mp_mul_2(&a, &b);
printf("mul2\n");
mp_todecimal(&a, buf);
printf("%s\n", buf);
mp_todecimal(&b, buf);
mp_to64(&a, buf);
printf("%s\n", buf);
mp_to64(&b, buf);
printf("%s\n", buf);
}
}

17
pics/makefile Normal file
View File

@ -0,0 +1,17 @@
# makes the images... yeah
default: pses
sliding_window.ps: sliding_window.tif
tiff2ps -c -e sliding_window.tif > sliding_window.ps
sliding_window.pdf: sliding_window.ps
epstopdf sliding_window.ps
pses: sliding_window.ps
pdfes: sliding_window.pdf
clean:
rm -rf *.ps *.pdf .xvpics

BIN
pics/sliding_window.TIF Normal file

Binary file not shown.

BIN
pics/sliding_window.sxd Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

113
tommath.h
View File

@ -1,11 +1,11 @@
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
@ -34,18 +34,18 @@ extern "C" {
#else
/* C on the other hand dosen't care */
#define OPT_CAST
/* C on the other hand doesn't care */
#define OPT_CAST
#endif
/* some default configurations.
/* some default configurations.
*
* A "mp_digit" must be able to hold DIGIT_BIT + 1 bits
* A "mp_word" must be able to hold 2*DIGIT_BIT + 1 bits
* A "mp_digit" must be able to hold DIGIT_BIT + 1 bits
* A "mp_word" must be able to hold 2*DIGIT_BIT + 1 bits
*
* At the very least a mp_digit must be able to hold 7 bits
* [any size beyond that is ok provided it overflow the data type]
* At the very least a mp_digit must be able to hold 7 bits
* [any size beyond that is ok provided it doesn't overflow the data type]
*/
#ifdef MP_8BIT
typedef unsigned char mp_digit;
@ -53,7 +53,21 @@ extern "C" {
#elif defined(MP_16BIT)
typedef unsigned short mp_digit;
typedef unsigned long mp_word;
#elif defined(MP_64BIT)
/* for GCC only on supported platforms */
#ifndef CRYPT
typedef unsigned long long ulong64;
typedef signed long long long64;
#endif
typedef ulong64 mp_digit;
typedef unsigned long mp_word __attribute__ ((mode(TI)));
#define DIGIT_BIT 60
#else
/* this is the default case, 28-bit digits */
/* this is to make porting into LibTomCrypt easier :-) */
#ifndef CRYPT
#ifdef _MSC_VER
typedef unsigned __int64 ulong64;
@ -61,23 +75,24 @@ extern "C" {
#else
typedef unsigned long long ulong64;
typedef signed long long long64;
#endif
#endif
#endif
#endif
/* default case */
typedef unsigned long mp_digit;
typedef ulong64 mp_word;
#define DIGIT_BIT 28
#endif
#define DIGIT_BIT 28
#endif
/* otherwise the bits per digit is calculated automatically from the size of a mp_digit */
#ifndef DIGIT_BIT
#define DIGIT_BIT ((CHAR_BIT * sizeof(mp_digit) - 1)) /* bits per digit */
#endif
#define MP_DIGIT_BIT DIGIT_BIT
#define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
#define MP_DIGIT_MAX MP_MASK
#define MP_DIGIT_MAX MP_MASK
/* equalities */
#define MP_LT -1 /* less than */
@ -99,7 +114,14 @@ extern int KARATSUBA_MUL_CUTOFF,
KARATSUBA_SQR_CUTOFF,
MONTGOMERY_EXPT_CUTOFF;
#define MP_PREC 64 /* default digits of precision */
/* various build options */
#define MP_PREC 64 /* default digits of precision (must be power of two) */
/* define this to use lower memory usage routines (exptmods mostly) */
/* #define MP_LOW_MEM */
/* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
#define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
typedef struct {
int used, alloc, sign;
@ -118,6 +140,12 @@ int mp_init(mp_int *a);
/* free a bignum */
void mp_clear(mp_int *a);
/* init a null terminated series of arguments */
int mp_init_multi(mp_int *mp, ...);
/* clear a null terminated series of arguments */
void mp_clear_multi(mp_int *mp, ...);
/* exchange two ints */
void mp_exch(mp_int *a, mp_int *b);
@ -143,7 +171,7 @@ void mp_zero(mp_int *a);
void mp_set(mp_int *a, mp_digit b);
/* set a 32-bit const */
int mp_set_int(mp_int *a, unsigned long b);
int mp_set_int(mp_int *a, unsigned int b);
/* copy, b = a */
int mp_copy(mp_int *a, mp_int *b);
@ -162,22 +190,22 @@ void mp_rshd(mp_int *a, int b);
/* left shift by "b" digits */
int mp_lshd(mp_int *a, int b);
/* c = a / 2^b */
/* c = a / 2**b */
int mp_div_2d(mp_int *a, int b, mp_int *c, mp_int *d);
/* b = a/2 */
int mp_div_2(mp_int *a, mp_int *b);
/* c = a * 2^b */
/* c = a * 2**b */
int mp_mul_2d(mp_int *a, int b, mp_int *c);
/* b = a*2 */
int mp_mul_2(mp_int *a, mp_int *b);
/* c = a mod 2^d */
/* c = a mod 2**d */
int mp_mod_2d(mp_int *a, int b, mp_int *c);
/* computes a = 2^b */
/* computes a = 2**b */
int mp_2expt(mp_int *a, int b);
/* makes a pseudo-random int of a given size */
@ -216,7 +244,7 @@ int mp_sub(mp_int *a, mp_int *b, mp_int *c);
/* c = a * b */
int mp_mul(mp_int *a, mp_int *b, mp_int *c);
/* b = a^2 */
/* b = a*a */
int mp_sqr(mp_int *a, mp_int *b);
/* a/b => cb + d == a */
@ -242,7 +270,7 @@ int mp_mul_d(mp_int *a, mp_digit b, mp_int *c);
/* a/b => cb + d == a */
int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d);
/* c = a^b */
/* c = a**b */
int mp_expt_d(mp_int *a, mp_digit b, mp_int *c);
/* c = a mod b, 0 <= c < b */
@ -271,7 +299,7 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c);
/* c = [a, b] or (a*b)/(a, b) */
int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
/* finds one of the b'th root of a, such that |c|^b <= |a|
/* finds one of the b'th root of a, such that |c|**b <= |a|
*
* returns error if a < 0 and b is even
*/
@ -288,7 +316,7 @@ int mp_reduce_setup(mp_int *a, mp_int *b);
/* Barrett Reduction, computes a (mod b) with a precomputed value c
*
* Assumes that 0 < a <= b^2, note if 0 > a > -(b^2) then you can merely
* Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely
* compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
*/
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
@ -296,12 +324,12 @@ int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
/* setups the montgomery reduction */
int mp_montgomery_setup(mp_int *a, mp_digit *mp);
/* computes a = B^n mod b without division or multiplication useful for
/* computes a = B**n mod b without division or multiplication useful for
* normalizing numbers in a Montgomery system.
*/
int mp_montgomery_calc_normalization(mp_int *a, mp_int *b);
/* computes xR^-1 == x (mod N) via Montgomery Reduction */
/* computes x/R == x (mod N) via Montgomery Reduction */
int mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
/* returns 1 if a is a valid DR modulus */
@ -313,32 +341,38 @@ void mp_dr_setup(mp_int *a, mp_digit *d);
/* reduces a modulo b using the Diminished Radix method */
int mp_dr_reduce(mp_int *a, mp_int *b, mp_digit mp);
/* d = a^b (mod c) */
/* d = a**b (mod c) */
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
/* ---> Primes <--- */
#define PRIME_SIZE 256 /* number of primes */
/* table of first 256 primes */
/* number of primes */
#ifdef MP_8BIT
#define PRIME_SIZE 31
#else
#define PRIME_SIZE 256
#endif
/* table of first PRIME_SIZE primes */
extern const mp_digit __prime_tab[];
/* result=1 if a is divisible by one of the first 256 primes */
/* result=1 if a is divisible by one of the first PRIME_SIZE primes */
int mp_prime_is_divisible(mp_int *a, int *result);
/* performs one Fermat test of "a" using base "b".
* Sets result to 0 if composite or 1 if probable prime
/* performs one Fermat test of "a" using base "b".
* Sets result to 0 if composite or 1 if probable prime
*/
int mp_prime_fermat(mp_int *a, mp_int *b, int *result);
/* performs one Miller-Rabin test of "a" using base "b".
* Sets result to 0 if composite or 1 if probable prime
* Sets result to 0 if composite or 1 if probable prime
*/
int mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result);
/* performs t rounds of Miller-Rabin on "a" using the first
* t prime bases. Also performs an initial sieve of trial
* division. Determines if "a" is prime with probability
* of error no more than (1/4)^t.
* of error no more than (1/4)**t.
*
* Sets result to 1 if probably prime, 0 otherwise
*/
@ -365,6 +399,9 @@ int mp_read_radix(mp_int *a, char *str, int radix);
int mp_toradix(mp_int *a, char *str, int radix);
int mp_radix_size(mp_int *a, int radix);
int mp_fread(mp_int *a, int radix, FILE *stream);
int mp_fwrite(mp_int *a, int radix, FILE *stream);
#define mp_read_raw(mp, str, len) mp_read_signed_bin((mp), (str), (len))
#define mp_raw_size(mp) mp_signed_bin_size(mp)
#define mp_toraw(mp, str) mp_to_signed_bin((mp), (str))

2459
tommath.src Normal file

File diff suppressed because it is too large Load Diff

4195
tommath.tex Normal file

File diff suppressed because it is too large Load Diff