diff --git a/bn.pdf b/bn.pdf index 8eac8d8..a6c3d07 100644 Binary files a/bn.pdf and b/bn.pdf differ diff --git a/bn.tex b/bn.tex index d243a70..9beddda 100644 --- a/bn.tex +++ b/bn.tex @@ -1,7 +1,7 @@ \documentclass[]{article} \begin{document} -\title{LibTomMath v0.23 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } +\title{LibTomMath v0.24 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle \newpage @@ -389,7 +389,7 @@ int mp_prime_is_prime(mp_int *a, int t, int *result); /* finds the next prime after the number "a" using "t" trials * of Miller-Rabin. */ -int mp_prime_next_prime(mp_int *a, int t); +int mp_prime_next_prime(mp_int *a, int t, int bbs_style); \end{verbatim} \subsection{Radix Conversions} @@ -832,9 +832,11 @@ primes and then $t$ rounds of Miller-Rabin using the first $t$ primes as bases. function will always set $result$ to $1$. If $a$ is composite then it will almost always set $result$ to $0$. The probability of error is given in figure two. -\subsubsection{mp\_prime\_next\_prime(mp\_int *a, int t)} +\subsubsection{mp\_prime\_next\_prime(mp\_int *a, int t, int bbs\_style)} This function will find the next prime \textbf{after} $a$ by using trial division and $t$ trials of -Miller-Rabin. +Miller-Rabin. If $bbs\_style$ is set to one than $a$ will be the next prime such that $a \equiv 3 \mbox{ (mod }4\mbox{)}$ +which is useful for certain algorithms. Otherwise, $a$ will be the next prime greater than the initial input +value and may be $\lbrace 1, 3 \rbrace \equiv a \mbox{ (mod }4\mbox{)}$. \section{Timing Analysis} diff --git a/bn_mp_add_d.c b/bn_mp_add_d.c index 66d1f07..9703ad3 100644 --- a/bn_mp_add_d.c +++ b/bn_mp_add_d.c @@ -18,15 +18,84 @@ int mp_add_d (mp_int * a, mp_digit b, mp_int * c) { - mp_int t; - int res; + int res, ix, oldused; + mp_digit *tmpa, *tmpc, mu; - if ((res = mp_init_size(&t, 1)) != MP_OKAY) { - return res; + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } } - mp_set (&t, b); - res = mp_add (a, &t, c); - mp_clear (&t); - return res; + /* if a is negative and |a| >= b, call c = |a| - b */ + if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { + /* temporarily fix sign of a */ + a->sign = MP_ZPOS; + + /* c = |a| - b */ + res = mp_sub_d(a, b, c); + + /* fix sign */ + a->sign = c->sign = MP_NEG; + + return res; + } + + + /* old number of used digits in c */ + oldused = c->used; + + /* sign always positive */ + c->sign = MP_ZPOS; + + /* source alias */ + tmpa = a->dp; + + /* destination alias */ + tmpc = c->dp; + + /* if a is positive */ + if (a->sign == MP_ZPOS) { + /* setup size */ + c->used = a->used + 1; + + /* add digit, after this we're propagating + * the carry. + */ + *tmpc = *tmpa++ + b; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + + /* now handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ + mu; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + } + /* set final carry */ + ix++; + *tmpc++ = mu; + + } else { + /* a was negative and |a| < b */ + c->used = 1; + + /* the result is a single digit */ + *tmpc++ = b - a->dp[0]; + + /* setup count so the clearing of oldused + * can fall through correctly + */ + ix = 1; + } + + /* now zero to oldused */ + while (ix++ < oldused) { + *tmpc++ = 0; + } + mp_clamp(c); + + return MP_OKAY; } + diff --git a/bn_mp_gcd.c b/bn_mp_gcd.c index ff6ba70..b73657c 100644 --- a/bn_mp_gcd.c +++ b/bn_mp_gcd.c @@ -29,7 +29,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) return mp_copy (a, c); } if (mp_iszero (a) == 1 && mp_iszero (b) == 1) { - mp_set (c, 1); + mp_zero(c); return MP_OKAY; } diff --git a/bn_mp_prime_miller_rabin.c b/bn_mp_prime_miller_rabin.c index f68ba75..c85136b 100644 --- a/bn_mp_prime_miller_rabin.c +++ b/bn_mp_prime_miller_rabin.c @@ -53,7 +53,7 @@ mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result) */ s = mp_cnt_lsb(&r); - /* now divide n - 1 by 2^s */ + /* now divide n - 1 by 2**s */ if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { goto __R; } diff --git a/bn_mp_prime_next_prime.c b/bn_mp_prime_next_prime.c index 0dde42a..2072ff2 100644 --- a/bn_mp_prime_next_prime.c +++ b/bn_mp_prime_next_prime.c @@ -112,7 +112,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style) /* y == 1 if any residue was zero [e.g. cannot be prime] */ y = 0; - /* increase step to next odd */ + /* increase step to next candidate */ step += kstep; /* compute the new residue without using division */ diff --git a/bn_mp_sub_d.c b/bn_mp_sub_d.c index 7ca565e..2fd8bdb 100644 --- a/bn_mp_sub_d.c +++ b/bn_mp_sub_d.c @@ -18,16 +18,61 @@ int mp_sub_d (mp_int * a, mp_digit b, mp_int * c) { - mp_int t; - int res; + mp_digit *tmpa, *tmpc, mu; + int res, ix, oldused; - - if ((res = mp_init (&t)) != MP_OKAY) { - return res; + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } } - mp_set (&t, b); - res = mp_sub (a, &t, c); - mp_clear (&t); - return res; + /* if a is negative just do an unsigned + * addition [with fudged signs] + */ + if (a->sign == MP_NEG) { + a->sign = MP_ZPOS; + res = mp_add_d(a, b, c); + a->sign = c->sign = MP_NEG; + return res; + } + + /* setup regs */ + oldused = c->used; + tmpa = a->dp; + tmpc = c->dp; + + /* if a <= b simply fix the single digit */ + if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { + *tmpc++ = b - *tmpa; + ix = 1; + + /* negative/1digit */ + c->sign = MP_NEG; + c->used = 1; + } else { + /* positive/size */ + c->sign = MP_ZPOS; + c->used = a->used; + + /* subtract first digit */ + *tmpc = *tmpa++ - b; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + + /* handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ - mu; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + } + } + + for (; ix < oldused; ix++) { + *tmpc++ = 0; + } + mp_clamp(c); + return MP_OKAY; } + diff --git a/changes.txt b/changes.txt index be4869f..8508424 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,12 @@ +July 15th, 2003 +v0.24 -- Optimized mp_add_d and mp_sub_d to not allocate temporary variables + -- Fixed mp_gcd() so the gcd of 0,0 is 0. Allows the gcd operation to be chained + e.g. (0,0,a) == a [instead of 1] + -- Should be one of the last release for a while. Working on LibTomMath book now. + -- optimized the pprime demo [/etc/pprime.c] to first make a huge table of single + digit primes then it reads them randomly instead of randomly choosing/testing single + digit primes. + July 12th, 2003 v0.23 -- Optimized mp_prime_next_prime() to not use mp_mod [via is_divisible()] in each iteration. Instead now a smaller table is kept of the residues which can be updated diff --git a/demo/demo.c b/demo/demo.c index e7b3fdb..0691efb 100644 --- a/demo/demo.c +++ b/demo/demo.c @@ -46,7 +46,7 @@ int main(void) { mp_int a, b, c, d, e, f; unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n, - div2_n, mul2_n; + div2_n, mul2_n, add_d_n, sub_d_n; unsigned rr; int cnt, ix, old_kara_m, old_kara_s; @@ -190,7 +190,7 @@ int main(void) mp_rand(&b, cnt); reset(); rr = 0; - do { + do { DO(mp_add(&a,&b,&c)); rr += 16; } while (rdtsc() < (CLOCKS_PER_SEC * 2)); @@ -355,7 +355,7 @@ int main(void) #endif div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n = - sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = 0; + sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= 0; /* force KARA and TOOM to enable despite cutoffs */ KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 110; @@ -374,7 +374,7 @@ int main(void) } - printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu ", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, div2_n, mul2_n); + printf("%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu ", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, div2_n, mul2_n, add_d_n, sub_d_n); fgets(cmd, 4095, stdin); cmd[strlen(cmd)-1] = 0; printf("%s ]\r",cmd); fflush(stdout); @@ -559,8 +559,33 @@ draw(&a);draw(&b);draw(&c);draw(&d); draw(&c); return 0; } + } else if (!strcmp(cmd, "add_d")) { ++add_d_n; + fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64); + fgets(buf, 4095, stdin); sscanf(buf, "%d", &ix); + fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64); + mp_add_d(&a, ix, &c); + if (mp_cmp(&b, &c) != MP_EQ) { + printf("add_d %lu failure\n", add_d_n); + draw(&a); + draw(&b); + draw(&c); + printf("d == %d\n", ix); + return 0; + } + } else if (!strcmp(cmd, "sub_d")) { ++sub_d_n; + fgets(buf, 4095, stdin); mp_read_radix(&a, buf, 64); + fgets(buf, 4095, stdin); sscanf(buf, "%d", &ix); + fgets(buf, 4095, stdin); mp_read_radix(&b, buf, 64); + mp_sub_d(&a, ix, &c); + if (mp_cmp(&b, &c) != MP_EQ) { + printf("sub_d %lu failure\n", sub_d_n); + draw(&a); + draw(&b); + draw(&c); + printf("d == %d\n", ix); + return 0; + } } - } return 0; } diff --git a/etc/makefile b/etc/makefile index eb732e3..84d1aa3 100644 --- a/etc/makefile +++ b/etc/makefile @@ -41,4 +41,4 @@ mont: mont.o clean: - rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime \ No newline at end of file + rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat diff --git a/etc/pprime.c b/etc/pprime.c index 6285924..9360a09 100644 --- a/etc/pprime.c +++ b/etc/pprime.c @@ -7,6 +7,9 @@ #include #include "tommath.h" +int n_prime; +FILE *primes; + /* fast square root */ static mp_digit i_sqrt (mp_word x) @@ -28,30 +31,35 @@ i_sqrt (mp_word x) /* generates a prime digit */ -static mp_digit -prime_digit () +static void gen_prime (void) { mp_digit r, x, y, next; + FILE *out; - /* make a DIGIT_BIT-bit random number */ - for (r = x = 0; x < DIGIT_BIT; x++) { - r = (r << 1) | (rand () & 1); - } + out = fopen("pprime.dat", "wb"); - /* now force it odd */ - r |= 1; - - /* force it to be >30 */ - if (r < 30) { - r += 30; - } + /* write first set of primes */ + r = 3; fwrite(&r, 1, sizeof(mp_digit), out); + r = 5; fwrite(&r, 1, sizeof(mp_digit), out); + r = 7; fwrite(&r, 1, sizeof(mp_digit), out); + r = 11; fwrite(&r, 1, sizeof(mp_digit), out); + r = 13; fwrite(&r, 1, sizeof(mp_digit), out); + r = 17; fwrite(&r, 1, sizeof(mp_digit), out); + r = 19; fwrite(&r, 1, sizeof(mp_digit), out); + r = 23; fwrite(&r, 1, sizeof(mp_digit), out); + r = 29; fwrite(&r, 1, sizeof(mp_digit), out); + r = 31; fwrite(&r, 1, sizeof(mp_digit), out); /* get square root, since if 'r' is composite its factors must be < than this */ y = i_sqrt (r); next = (y + 1) * (y + 1); + for (;;) { do { r += 2; /* next candidate */ + r &= MP_MASK; + if (r < 31) break; + if (!((r>>1)&65535)) sleep(1); /* update sqrt ? */ if (next <= r) { @@ -133,10 +141,36 @@ prime_digit () } } } while (x == 0); + if (r > 31) { fwrite(&r, 1, sizeof(mp_digit), out); printf("%9d\r", r); fflush(stdout); } + if (r < 31) break; + } - return r; + fclose(out); } +void load_tab(void) +{ + primes = fopen("pprime.dat", "rb"); + if (primes == NULL) { + gen_prime(); + primes = fopen("pprime.dat", "rb"); + } + fseek(primes, 0, SEEK_END); + n_prime = ftell(primes) / sizeof(mp_digit); +} + +mp_digit prime_digit(void) +{ + int n; + mp_digit d; + + n = abs(rand()) % n_prime; + fseek(primes, n * sizeof(mp_digit), SEEK_SET); + fread(&d, 1, sizeof(mp_digit), primes); + return d; +} + + /* makes a prime of at least k bits */ int pprime (int k, int li, mp_int * p, mp_int * q) @@ -292,7 +326,7 @@ pprime (int k, int li, mp_int * p, mp_int * q) /* { char buf[4096]; - + mp_toradix(&n, buf, 10); printf("Certificate of primality for:\n%s\n\n", buf); mp_toradix(&a, buf, 10); @@ -300,8 +334,9 @@ pprime (int k, int li, mp_int * p, mp_int * q) mp_toradix(&b, buf, 10); printf("B == \n%s\n", buf); printf("----------------------------------------------------------------\n"); -} +} */ + /* a = n */ mp_copy (&n, &a); } @@ -335,6 +370,7 @@ main (void) clock_t t1; srand (time (NULL)); + load_tab(); printf ("Enter # of bits: \n"); fgets (buf, sizeof (buf), stdin); diff --git a/makefile b/makefile index 5ff6957..c939c8b 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,9 @@ +#Makefile for GCC +# +#Tom St Denis CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops -VERSION=0.23 +VERSION=0.24 default: libtommath.a @@ -69,7 +72,7 @@ docdvi: tommath.src poster: poster.tex pdflatex poster rm -f poster.aux poster.log - + # makes the LTM book PS/PDF file, requires tetex, cleans up the LaTeX temp files docs: cd pics ; make pdfes @@ -91,7 +94,7 @@ docs: manual: latex bn pdflatex bn - rm -f bn.aux bn.dvi bn.log + rm -f bn.aux bn.dvi bn.log clean: rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \ diff --git a/mtest/mtest.c b/mtest/mtest.c index 2c111c5..33ff215 100644 --- a/mtest/mtest.c +++ b/mtest/mtest.c @@ -28,6 +28,12 @@ mulmod */ +#ifdef MP_8BIT +#define THE_MASK 127 +#else +#define THE_MASK 32767 +#endif + #include #include #include @@ -63,7 +69,7 @@ void rand_num2(mp_int *a) int main(void) { - int n; + int n, tmp; mp_int a, b, c, d, e; clock_t t1; char buf[4096]; @@ -108,7 +114,7 @@ int main(void) t1 = clock(); } - n = fgetc(rng) % 13; + n = fgetc(rng) % 15; if (n == 0) { /* add tests */ @@ -272,6 +278,24 @@ int main(void) printf("%s\n", buf); mp_to64(&b, buf); printf("%s\n", buf); + } else if (n == 13) { + rand_num2(&a); + tmp = abs(rand()) & THE_MASK; + mp_add_d(&a, tmp, &b); + printf("add_d\n"); + mp_to64(&a, buf); + printf("%s\n%d\n", buf, tmp); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 14) { + rand_num2(&a); + tmp = abs(rand()) & THE_MASK; + mp_sub_d(&a, tmp, &b); + printf("sub_d\n"); + mp_to64(&a, buf); + printf("%s\n%d\n", buf, tmp); + mp_to64(&b, buf); + printf("%s\n", buf); } } fclose(rng); diff --git a/poster.pdf b/poster.pdf index 3d3377d..1b6ce6e 100644 Binary files a/poster.pdf and b/poster.pdf differ diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index 5c09fbf..2f7a853 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -813,19 +813,88 @@ mp_add (mp_int * a, mp_int * b, mp_int * c) int mp_add_d (mp_int * a, mp_digit b, mp_int * c) { - mp_int t; - int res; + int res, ix, oldused; + mp_digit *tmpa, *tmpc, mu; - if ((res = mp_init_size(&t, 1)) != MP_OKAY) { - return res; + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } } - mp_set (&t, b); - res = mp_add (a, &t, c); - mp_clear (&t); - return res; + /* if a is negative and |a| >= b, call c = |a| - b */ + if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { + /* temporarily fix sign of a */ + a->sign = MP_ZPOS; + + /* c = |a| - b */ + res = mp_sub_d(a, b, c); + + /* fix sign */ + a->sign = c->sign = MP_NEG; + + return res; + } + + + /* old number of used digits in c */ + oldused = c->used; + + /* sign always positive */ + c->sign = MP_ZPOS; + + /* source alias */ + tmpa = a->dp; + + /* destination alias */ + tmpc = c->dp; + + /* if a is positive */ + if (a->sign == MP_ZPOS) { + /* setup size */ + c->used = a->used + 1; + + /* add digit, after this we're propagating + * the carry. + */ + *tmpc = *tmpa++ + b; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + + /* now handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ + mu; + mu = *tmpc >> DIGIT_BIT; + *tmpc++ &= MP_MASK; + } + /* set final carry */ + ix++; + *tmpc++ = mu; + + } else { + /* a was negative and |a| < b */ + c->used = 1; + + /* the result is a single digit */ + *tmpc++ = b - a->dp[0]; + + /* setup count so the clearing of oldused + * can fall through correctly + */ + ix = 1; + } + + /* now zero to oldused */ + while (ix++ < oldused) { + *tmpc++ = 0; + } + mp_clamp(c); + + return MP_OKAY; } + /* End: bn_mp_add_d.c */ /* Start: bn_mp_addmod.c */ @@ -2577,7 +2646,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) return mp_copy (a, c); } if (mp_iszero (a) == 1 && mp_iszero (b) == 1) { - mp_set (c, 1); + mp_zero(c); return MP_OKAY; } @@ -4674,7 +4743,7 @@ mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result) */ s = mp_cnt_lsb(&r); - /* now divide n - 1 by 2^s */ + /* now divide n - 1 by 2**s */ if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { goto __R; } @@ -4835,7 +4904,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style) /* y == 1 if any residue was zero [e.g. cannot be prime] */ y = 0; - /* increase step to next odd */ + /* increase step to next candidate */ step += kstep; /* compute the new residue without using division */ @@ -5812,20 +5881,65 @@ mp_sub (mp_int * a, mp_int * b, mp_int * c) int mp_sub_d (mp_int * a, mp_digit b, mp_int * c) { - mp_int t; - int res; + mp_digit *tmpa, *tmpc, mu; + int res, ix, oldused; - - if ((res = mp_init (&t)) != MP_OKAY) { - return res; + /* grow c as required */ + if (c->alloc < a->used + 1) { + if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { + return res; + } } - mp_set (&t, b); - res = mp_sub (a, &t, c); - mp_clear (&t); - return res; + /* if a is negative just do an unsigned + * addition [with fudged signs] + */ + if (a->sign == MP_NEG) { + a->sign = MP_ZPOS; + res = mp_add_d(a, b, c); + a->sign = c->sign = MP_NEG; + return res; + } + + /* setup regs */ + oldused = c->used; + tmpa = a->dp; + tmpc = c->dp; + + /* if a <= b simply fix the single digit */ + if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { + *tmpc++ = b - *tmpa; + ix = 1; + + /* negative/1digit */ + c->sign = MP_NEG; + c->used = 1; + } else { + /* positive/size */ + c->sign = MP_ZPOS; + c->used = a->used; + + /* subtract first digit */ + *tmpc = *tmpa++ - b; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + + /* handle rest of the digits */ + for (ix = 1; ix < a->used; ix++) { + *tmpc = *tmpa++ - mu; + mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); + *tmpc++ &= MP_MASK; + } + } + + for (; ix < oldused; ix++) { + *tmpc++ = 0; + } + mp_clamp(c); + return MP_OKAY; } + /* End: bn_mp_sub_d.c */ /* Start: bn_mp_submod.c */