diff --git a/bn.pdf b/bn.pdf index dbaaf90..0fe6beb 100644 Binary files a/bn.pdf and b/bn.pdf differ diff --git a/bn.tex b/bn.tex index 06f7b1e..2fc284d 100644 --- a/bn.tex +++ b/bn.tex @@ -1,7 +1,7 @@ \documentclass[]{article} \begin{document} -\title{LibTomMath v0.26 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } +\title{LibTomMath v0.27 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle \newpage diff --git a/bn_mp_add_d.c b/bn_mp_add_d.c index 2d13d48..edc93c1 100644 --- a/bn_mp_add_d.c +++ b/bn_mp_add_d.c @@ -56,9 +56,6 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) /* if a is positive */ if (a->sign == MP_ZPOS) { - /* setup size */ - c->used = a->used + 1; - /* add digit, after this we're propagating * the carry. */ @@ -75,6 +72,9 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) /* set final carry */ ix++; *tmpc++ = mu; + + /* setup size */ + c->used = a->used + 1; } else { /* a was negative and |a| < b */ c->used = 1; diff --git a/bn_mp_dr_reduce.c b/bn_mp_dr_reduce.c index 2fe9dbe..bad240a 100644 --- a/bn_mp_dr_reduce.c +++ b/bn_mp_dr_reduce.c @@ -26,7 +26,7 @@ * * Has been modified to use algorithm 7.10 from the LTM book instead * - * Input x must be in the range 0 <= x <= (n-1)^2 + * Input x must be in the range 0 <= x <= (n-1)**2 */ int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) @@ -34,10 +34,10 @@ mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) int err, i, m; mp_word r; mp_digit mu, *tmpx1, *tmpx2; - + /* m = digits in modulus */ m = n->used; - + /* ensure that "x" has at least 2m digits */ if (x->alloc < m + m) { if ((err = mp_grow (x, m + m)) != MP_OKAY) { @@ -45,20 +45,20 @@ mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) } } -/* top of loop, this is where the code resumes if +/* top of loop, this is where the code resumes if * another reduction pass is required. */ top: /* aliases for digits */ /* alias for lower half of x */ tmpx1 = x->dp; - + /* alias for upper half of x, or x/B**m */ tmpx2 = x->dp + m; - + /* set carry to zero */ mu = 0; - + /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ for (i = 0; i < m; i++) { r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; @@ -77,7 +77,7 @@ top: /* clamp, sub and return */ mp_clamp (x); - /* if x >= n then subtract and reduce again + /* if x >= n then subtract and reduce again * Each successive "recursion" makes the input smaller and smaller. */ if (mp_cmp_mag (x, n) != MP_LT) { diff --git a/bn_mp_exptmod_fast.c b/bn_mp_exptmod_fast.c index 1fe9df6..c281733 100644 --- a/bn_mp_exptmod_fast.c +++ b/bn_mp_exptmod_fast.c @@ -14,7 +14,7 @@ */ #include -/* computes Y == G^X mod P, HAC pp.616, Algorithm 14.85 +/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 * * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. * The value of k changes based on the size of the exponent. @@ -34,10 +34,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) mp_int M[TAB_SIZE], res; mp_digit buf, mp; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; - + /* use a pointer to the reduction algorithm. This allows us to use * one of many reduction algorithms without modding the guts of - * the code with if statements everywhere. + * the code with if statements everywhere. */ int (*redux)(mp_int*,mp_int*,mp_digit); @@ -68,7 +68,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) /* init M array */ /* init first cell */ if ((err = mp_init(&M[1])) != MP_OKAY) { - return err; + return err; } /* now init the second half of the array */ @@ -88,7 +88,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { goto __M; } - + /* automatically pick the comba one if available (saves quite a few calls/ifs) */ if (((P->used * 2 + 1) < MP_WARRAY) && P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { diff --git a/bn_mp_grow.c b/bn_mp_grow.c index e6ae02f..c2c47a5 100644 --- a/bn_mp_grow.c +++ b/bn_mp_grow.c @@ -19,17 +19,29 @@ int mp_grow (mp_int * a, int size) { int i; + mp_digit *tmp; + /* if the alloc size is smaller alloc more ram */ if (a->alloc < size) { /* ensure there are always at least MP_PREC digits extra on top */ - size += (MP_PREC * 2) - (size % MP_PREC); + size += (MP_PREC * 2) - (size % MP_PREC); - a->dp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size); - if (a->dp == NULL) { + /* reallocate the array a->dp + * + * We store the return in a temporary variable + * in case the operation failed we don't want + * to overwrite the dp member of a. + */ + tmp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size); + if (tmp == NULL) { + /* reallocation failed but "a" is still valid [can be freed] */ return MP_MEM; } + /* reallocation succeeded so set a->dp */ + a->dp = tmp; + /* zero excess digits */ i = a->alloc; a->alloc = size; diff --git a/bn_mp_mod_2d.c b/bn_mp_mod_2d.c index c25212d..89e9081 100644 --- a/bn_mp_mod_2d.c +++ b/bn_mp_mod_2d.c @@ -14,7 +14,7 @@ */ #include -/* calc a value mod 2^b */ +/* calc a value mod 2**b */ int mp_mod_2d (mp_int * a, int b, mp_int * c) { diff --git a/bn_mp_mul_d.c b/bn_mp_mul_d.c index ae665bc..658fe01 100644 --- a/bn_mp_mul_d.c +++ b/bn_mp_mul_d.c @@ -18,12 +18,13 @@ int mp_mul_d (mp_int * a, mp_digit b, mp_int * c) { - int res, pa, olduse; + mp_digit u, *tmpa, *tmpc; + mp_word r; + int ix, res, olduse; /* make sure c is big enough to hold a*b */ - pa = a->used; - if (c->alloc < pa + 1) { - if ((res = mp_grow (c, pa + 1)) != MP_OKAY) { + if (c->alloc < a->used + 1) { + if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { return res; } } @@ -31,42 +32,41 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c) /* get the original destinations used count */ olduse = c->used; - /* set the new temporary used count */ - c->used = pa + 1; + /* set the sign */ c->sign = a->sign; - { - register mp_digit u, *tmpa, *tmpc; - register mp_word r; - register int ix; + /* alias for a->dp [source] */ + tmpa = a->dp; - /* alias for a->dp [source] */ - tmpa = a->dp; + /* alias for c->dp [dest] */ + tmpc = c->dp; - /* alias for c->dp [dest] */ - tmpc = c->dp; + /* zero carry */ + u = 0; - /* zero carry */ - u = 0; - for (ix = 0; ix < pa; ix++) { - /* compute product and carry sum for this term */ - r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); + /* compute columns */ + for (ix = 0; ix < a->used; ix++) { + /* compute product and carry sum for this term */ + r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); - /* mask off higher bits to get a single digit */ - *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); + /* mask off higher bits to get a single digit */ + *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); - /* send carry into next iteration */ - u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); - } - /* store final carry [if any] */ - *tmpc++ = u; - - /* now zero digits above the top */ - for (; pa < olduse; pa++) { - *tmpc++ = 0; - } + /* send carry into next iteration */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } - mp_clamp (c); + /* store final carry [if any] */ + *tmpc++ = u; + + /* now zero digits above the top */ + while (ix++ < olduse) { + *tmpc++ = 0; + } + + /* set used count */ + c->used = a->used + 1; + mp_clamp(c); + return MP_OKAY; } diff --git a/bn_mp_sub_d.c b/bn_mp_sub_d.c index d9938f4..6368970 100644 --- a/bn_mp_sub_d.c +++ b/bn_mp_sub_d.c @@ -73,7 +73,8 @@ mp_sub_d (mp_int * a, mp_digit b, mp_int * c) } } - for (; ix < oldused; ix++) { + /* zero excess digits */ + while (ix++ < oldused) { *tmpc++ = 0; } mp_clamp(c); diff --git a/bn_mp_toom_sqr.c b/bn_mp_toom_sqr.c index 7db1592..de3094a 100644 --- a/bn_mp_toom_sqr.c +++ b/bn_mp_toom_sqr.c @@ -15,12 +15,12 @@ #include /* squaring using Toom-Cook 3-way algorithm */ -int +int mp_toom_sqr(mp_int *a, mp_int *b) { mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; int res, B; - + /* init temps */ if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { return res; @@ -28,8 +28,8 @@ mp_toom_sqr(mp_int *a, mp_int *b) /* B */ B = a->used / 3; - - /* a = a2 * B^2 + a1 * B + a0 */ + + /* a = a2 * B**2 + a1 * B + a0 */ if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { goto ERR; } @@ -44,17 +44,17 @@ mp_toom_sqr(mp_int *a, mp_int *b) goto ERR; } mp_rshd(&a2, B*2); - + /* w0 = a0*a0 */ if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { goto ERR; } - + /* w4 = a2 * a2 */ if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { goto ERR; } - + /* w1 = (a2 + 2(a1 + 2a0))**2 */ if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { goto ERR; @@ -68,11 +68,11 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { goto ERR; } - + if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { goto ERR; } - + /* w3 = (a0 + 2(a1 + 2a2))**2 */ if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { goto ERR; @@ -86,11 +86,11 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { goto ERR; } - + if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { goto ERR; } - + /* w2 = (a2 + a1 + a0)**2 */ if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { @@ -102,18 +102,18 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { goto ERR; } - - /* now solve the matrix - + + /* now solve the matrix + 0 0 0 0 1 1 2 4 8 16 1 1 1 1 1 16 8 4 2 1 1 0 0 0 0 - + using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. */ - + /* r1 - r4 */ if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { goto ERR; @@ -185,7 +185,7 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { goto ERR; } - + /* at this point shift W[n] by B*n */ if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { goto ERR; @@ -198,8 +198,8 @@ mp_toom_sqr(mp_int *a, mp_int *b) } if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { goto ERR; - } - + } + if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { goto ERR; } @@ -211,10 +211,10 @@ mp_toom_sqr(mp_int *a, mp_int *b) } if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { goto ERR; - } - + } + ERR: mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); return res; -} - +} + diff --git a/changes.txt b/changes.txt index 0772f45..b6c6fad 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,12 @@ +Sept 19th, 2003 +v0.27 -- Removed changes.txt~ which was made by accident since "kate" decided it was + a good time to re-enable backups... [kde is fun!] + -- In mp_grow() "a->dp" is not overwritten by realloc call [re: memory leak] + Now if mp_grow() fails the mp_int is still valid and can be cleared via + mp_clear() to reclaim the memory. + -- Henrik Goldman found a buffer overflow bug in mp_add_d(). Fixed. + -- Cleaned up mp_mul_d() to be much easier to read and follow. + Aug 29th, 2003 v0.26 -- Fixed typo that caused warning with GCC 3.2 -- Martin Marcel noticed a bug in mp_neg() that allowed negative zeroes. diff --git a/changes.txt~ b/changes.txt~ deleted file mode 100644 index 2ccb559..0000000 --- a/changes.txt~ +++ /dev/null @@ -1,244 +0,0 @@ -Aug 11th, 2003 -v0.26 -- Fixed typo that caused warning with GCC 3.2 - -- Martin Marcel noticed a bug in mp_neg() that allowed negative zeroes. - Also, Martin is the fellow who noted the bugs in mp_gcd() of 0.24/0.25. - -- Martin Marcel noticed an optimization [and slight bug] in mp_lcm(). - -- Added fix to mp_read_unsigned_bin to prevent a buffer overflow. - -- Beefed up the comments in the baseline multipliers [and montgomery] - -- Added "mont" demo to the makefile.msvc in etc/ - -- Optimized sign compares in mp_cmp from 4 to 2 cases. - -Aug 4th, 2003 -v0.25 -- Fix to mp_gcd again... oops (0,-a) == (-a, 0) == a - -- Fix to mp_clear which didn't reset the sign [Greg Rose] - -- Added mp_error_to_string() to convert return codes to strings. [Greg Rose] - -- Optimized fast_mp_invmod() to do the test for invalid inputs [both even] - first so temps don't have to be initialized if it's going to fail. - -- Optimized mp_gcd() by removing mp_div_2d calls for when one of the inputs - is odd. - -- Tons of new comments, some indentation fixups, etc. - -- mp_jacobi() returns MP_VAL if the modulus is less than or equal to zero. - -- fixed two typos in the header of each file :-) - -- LibTomMath is officially Public Domain [see LICENSE] - -July 15th, 2003 -v0.24 -- Optimized mp_add_d and mp_sub_d to not allocate temporary variables - -- Fixed mp_gcd() so the gcd of 0,0 is 0. Allows the gcd operation to be chained - e.g. (0,0,a) == a [instead of 1] - -- Should be one of the last release for a while. Working on LibTomMath book now. - -- optimized the pprime demo [/etc/pprime.c] to first make a huge table of single - digit primes then it reads them randomly instead of randomly choosing/testing single - digit primes. - -July 12th, 2003 -v0.23 -- Optimized mp_prime_next_prime() to not use mp_mod [via is_divisible()] in each - iteration. Instead now a smaller table is kept of the residues which can be updated - without division. - -- Fixed a bug in next_prime() where an input of zero would be treated as odd and - have two added to it [to move to the next odd]. - -- fixed a bug in prime_fermat() and prime_miller_rabin() which allowed the base - to be negative, zero or one. Normally the test is only valid if the base is - greater than one. - -- changed the next_prime() prototype to accept a new parameter "bbs_style" which - will find the next prime congruent to 3 mod 4. The default [bbs_style==0] will - make primes which are either congruent to 1 or 3 mod 4. - -- fixed mp_read_unsigned_bin() so that it doesn't include both code for - the case DIGIT_BIT < 8 and >= 8 - -- optimized div_d() to easy out on division by 1 [or if a == 0] and use - logical shifts if the divisor is a power of two. - -- the default DIGIT_BIT type was not int for non-default builds. Fixed. - -July 2nd, 2003 -v0.22 -- Fixed up mp_invmod so the result is properly in range now [was always congruent to the inverse...] - -- Fixed up s_mp_exptmod and mp_exptmod_fast so the lower half of the pre-computed table isn't allocated - which makes the algorithm use half as much ram. - -- Fixed the install script not to make the book :-) [which isn't included anyways] - -- added mp_cnt_lsb() which counts how many of the lsbs are zero - -- optimized mp_gcd() to use the new mp_cnt_lsb() to replace multiple divisions by two by a single division. - -- applied similar optimization to mp_prime_miller_rabin(). - -- Fixed a bug in both mp_invmod() and fast_mp_invmod() which tested for odd - via "mp_iseven() == 0" which is not valid [since zero is not even either]. - -June 19th, 2003 -v0.21 -- Fixed bug in mp_mul_d which would not handle sign correctly [would not always forward it] - -- Removed the #line lines from gen.pl [was in violation of ISO C] - -June 8th, 2003 -v0.20 -- Removed the book from the package. Added the TDCAL license document. - -- This release is officially pure-bred TDCAL again [last officially TDCAL based release was v0.16] - -June 6th, 2003 -v0.19 -- Fixed a bug in mp_montgomery_reduce() which was introduced when I tweaked mp_rshd() in the previous release. - Essentially the digits were not trimmed before the compare which cause a subtraction to occur all the time. - -- Fixed up etc/tune.c a bit to stop testing new cutoffs after 16 failures [to find more optimal points]. - Brute force ho! - - -May 29th, 2003 -v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not very elegantly. - (e.g. correct result, just bad looking code) - -- Fixed bug in mp_sqr which still had a 512 constant instead of MP_WARRAY - -- Added Toom-Cook multipliers [needs tuning!] - -- Added efficient divide by 3 algorithm mp_div_3 - -- Re-wrote mp_div_d to be faster than calling mp_div - -- Added in a donated BCC makefile and a single page LTM poster (ahalhabsi@sbcglobal.net) - -- Added mp_reduce_2k which reduces an input modulo n = 2**p - k for any single digit k - -- Made the exptmod system be aware of the 2k reduction algorithms. - -- Rewrote mp_dr_reduce to be smaller, simpler and easier to understand. - -May 17th, 2003 -v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well - as several smaller suggestions. Thanks! - -- removed call to mp_cmp in inner loop of mp_div and put mp_cmp_mag in its place :-) - -- Fixed bug in mp_exptmod that would cause it to fail for odd moduli when DIGIT_BIT != 28 - -- mp_exptmod now also returns errors if the modulus is negative and will handle negative exponents - -- mp_prime_is_prime will now return true if the input is one of the primes in the prime table - -- Damian M Gryski (dgryski@uwaterloo.ca) found a index out of bounds error in the - mp_fast_s_mp_mul_high_digs function which didn't come up before. (fixed) - -- Refactored the DR reduction code so there is only one function per file. - -- Fixed bug in the mp_mul() which would erroneously avoid the faster multiplier [comba] when it was - allowed. The bug would not cause the incorrect value to be produced just less efficient (fixed) - -- Fixed similar bug in the Montgomery reduction code. - -- Added tons of (mp_digit) casts so the 7/15/28/31 bit digit code will work flawlessly out of the box. - Also added limited support for 64-bit machines with a 60-bit digit. Both thanks to Tom Wu (tom@arcot.com) - -- Added new comments here and there, cleaned up some code [style stuff] - -- Fixed a lingering typo in mp_exptmod* that would set bitcnt to zero then one. Very silly stuff :-) - -- Fixed up mp_exptmod_fast so it would set "redux" to the comba Montgomery reduction if allowed. This - saves quite a few calls and if statements. - -- Added etc/mont.c a test of the Montgomery reduction [assuming all else works :-| ] - -- Fixed up etc/tune.c to use a wider test range [more appropriate] also added a x86 based addition which - uses RDTSC for high precision timing. - -- Updated demo/demo.c to remove MPI stuff [won't work anyways], made the tests run for 2 seconds each so its - not so insanely slow. Also made the output space delimited [and fixed up various errors] - -- Added logs directory, logs/graph.dem which will use gnuplot to make a series of PNG files - that go with the pre-made index.html. You have to build [via make timing] and run ltmtest first in the - root of the package. - -- Fixed a bug in mp_sub and mp_add where "-a - -a" or "-a + a" would produce -0 as the result [obviously invalid]. - -- Fixed a bug in mp_rshd. If the count == a.used it should zero/return [instead of shifting] - -- Fixed a "off-by-one" bug in mp_mul2d. The initial size check on alloc would be off by one if the residue - shifting caused a carry. - -- Fixed a bug where s_mp_mul_digs() would not call the Comba based routine if allowed. This made Barrett reduction - slower than it had to be. - -Mar 29th, 2003 -v0.16 -- Sped up mp_div by making normalization one shift call - -- Sped up mp_mul_2d/mp_div_2d by aliasing pointers :-) - -- Cleaned up mp_gcd to use the macros for odd/even detection - -- Added comments here and there, mostly there but occasionally here too. - -Mar 22nd, 2003 -v0.15 -- Added series of prime testing routines to lib - -- Fixed up etc/tune.c - -- Added DR reduction algorithm - -- Beefed up the manual more. - -- Fixed up demo/demo.c so it doesn't have so many warnings and it does the full series of - tests - -- Added "pre-gen" directory which will hold a "gen.pl"'ed copy of the entire lib [done at - zipup time so its always the latest] - -- Added conditional casts for C++ users [boo!] - -Mar 15th, 2003 -v0.14 -- Tons of manual updates - -- cleaned up the directory - -- added MSVC makefiles - -- source changes [that I don't recall] - -- Fixed up the lshd/rshd code to use pointer aliasing - -- Fixed up the mul_2d and div_2d to not call rshd/lshd unless needed - -- Fixed up etc/tune.c a tad - -- fixed up demo/demo.c to output comma-delimited results of timing - also fixed up timing demo to use a finer granularity for various functions - -- fixed up demo/demo.c testing to pause during testing so my Duron won't catch on fire - [stays around 31-35C during testing :-)] - -Feb 13th, 2003 -v0.13 -- tons of minor speed-ups in low level add, sub, mul_2 and div_2 which propagate - to other functions like mp_invmod, mp_div, etc... - -- Sped up mp_exptmod_fast by using new code to find R mod m [e.g. B^n mod m] - -- minor fixes - -Jan 17th, 2003 -v0.12 -- re-wrote the majority of the makefile so its more portable and will - install via "make install" on most *nix platforms - -- Re-packaged all the source as seperate files. Means the library a single - file packagage any more. Instead of just adding "bn.c" you have to add - libtommath.a - -- Renamed "bn.h" to "tommath.h" - -- Changes to the manual to reflect all of this - -- Used GNU Indent to clean up the source - -Jan 15th, 2003 -v0.11 -- More subtle fixes - -- Moved to gentoo linux [hurrah!] so made *nix specific fixes to the make process - -- Sped up the montgomery reduction code quite a bit - -- fixed up demo so when building timing for the x86 it assumes ELF format now - -Jan 9th, 2003 -v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code. - -- Added baseline montgomery and comba montgomery reductions, sped up exptmods - [to a point, see bn.h for MONTGOMERY_EXPT_CUTOFF] - -Jan 6th, 2003 -v0.09 -- Updated the manual to reflect recent changes. :-) - -- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib - -- Added a Mersenne prime finder demo in ./etc/mersenne.c - -Jan 2nd, 2003 -v0.08 -- Sped up the multipliers by moving the inner loop variables into a smaller scope - -- Corrected a bunch of small "warnings" - -- Added more comments - -- Made "mtest" be able to use /dev/random, /dev/urandom or stdin for RNG data - -- Corrected some bugs where error messages were potentially ignored - -- add etc/pprime.c program which makes numbers which are provably prime. - -Jan 1st, 2003 -v0.07 -- Removed alot of heap operations from core functions to speed them up - -- Added a root finding function [and mp_sqrt macro like from MPI] - -- Added more to manual - -Dec 31st, 2002 -v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc... - -- Cleaned up the header a bit more - -Dec 30th, 2002 -v0.05 -- Builds with MSVC out of the box - -- Fixed a bug in mp_invmod w.r.t. even moduli - -- Made mp_toradix and mp_read_radix use char instead of unsigned char arrays - -- Fixed up exptmod to use fewer multiplications - -- Fixed up mp_init_size to use only one heap operation - -- Note there is a slight "off-by-one" bug in the library somewhere - without the padding (see the source for comment) the library - crashes in libtomcrypt. Anyways a reasonable workaround is to pad the - numbers which will always correct it since as the numbers grow the padding - will still be beyond the end of the number - -- Added more to the manual - -Dec 29th, 2002 -v0.04 -- Fixed a memory leak in mp_to_unsigned_bin - -- optimized invmod code - -- Fixed bug in mp_div - -- use exchange instead of copy for results - -- added a bit more to the manual - -Dec 27th, 2002 -v0.03 -- Sped up s_mp_mul_high_digs by not computing the carries of the lower digits - -- Fixed a bug where mp_set_int wouldn't zero the value first and set the used member. - -- fixed a bug in s_mp_mul_high_digs where the limit placed on the result digits was not calculated properly - -- fixed bugs in add/sub/mul/sqr_mod functions where if the modulus and dest were the same it wouldn't work - -- fixed a bug in mp_mod and mp_mod_d concerning negative inputs - -- mp_mul_d didn't preserve sign - -- Many many many many fixes - -- Works in LibTomCrypt now :-) - -- Added iterations to the timing demos... more accurate. - -- Tom needs a job. - -Dec 26th, 2002 -v0.02 -- Fixed a few "slips" in the manual. This is "LibTomMath" afterall :-) - -- Added mp_cmp_mag, mp_neg, mp_abs and mp_radix_size that were missing. - -- Sped up the fast [comba] multipliers more [yahoo!] - -Dec 25th,2002 -v0.01 -- Initial release. Gimme a break. - -- Todo list, - add details to manual [e.g. algorithms] - more comments in code - example programs diff --git a/demo/demo.c b/demo/demo.c index 67984aa..3b7997d 100644 --- a/demo/demo.c +++ b/demo/demo.c @@ -68,8 +68,8 @@ int main(void) mp_init(&c); mp_init(&d); mp_init(&e); - mp_init(&f); - + mp_init(&f); + srand(time(NULL)); #if 0 @@ -89,7 +89,7 @@ int main(void) for (;;) { aa = abs(rand()) & MP_MASK; bb = abs(rand()) & MP_MASK; - if (MULT(aa,bb) != (aa*bb)) { + if (MULT(aa,bb) != (aa*bb)) { printf("%llu * %llu == %llu or %llu?\n", aa, bb, (ulong64)MULT(aa,bb), (ulong64)(aa*bb)); return 0; } @@ -111,18 +111,18 @@ int main(void) /* test mp_reduce_2k */ #if 0 - for (cnt = 3; cnt <= 4096; ++cnt) { + for (cnt = 3; cnt <= 256; ++cnt) { mp_digit tmp; mp_2expt(&a, cnt); mp_sub_d(&a, 1, &a); /* a = 2**cnt - 1 */ - - + + printf("\nTesting %4d bits", cnt); printf("(%d)", mp_reduce_is_2k(&a)); mp_reduce_2k_setup(&a, &tmp); printf("(%d)", tmp); - for (ix = 0; ix < 100000; ix++) { - if (!(ix & 1023)) {printf("."); fflush(stdout); } + for (ix = 0; ix < 10000; ix++) { + if (!(ix & 127)) {printf("."); fflush(stdout); } mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2); mp_copy(&c, &b); mp_mod(&c, &a, &c); @@ -135,22 +135,23 @@ int main(void) } #endif - + /* test mp_div_3 */ #if 0 - for (cnt = 0; cnt < 1000000; ) { + for (cnt = 0; cnt < 10000; ) { mp_digit r1, r2; - + if (!(++cnt & 127)) printf("%9d\r", cnt); mp_rand(&a, abs(rand()) % 32 + 1); mp_div_d(&a, 3, &b, &r1); mp_div_3(&a, &c, &r2); - + if (mp_cmp(&b, &c) || r1 != r2) { - printf("Failure\n"); + printf("\n\nmp_div_3 => Failure\n"); } } -#endif + printf("\n\nPassed div_3 testing\n"); +#endif /* test the DR reduction */ #if 0 @@ -162,7 +163,7 @@ int main(void) a.dp[ix] = MP_MASK; } a.used = cnt; - mp_prime_next_prime(&a, 3); + mp_prime_next_prime(&a, 3, 0); mp_rand(&b, cnt - 1); mp_copy(&b, &c); @@ -178,9 +179,9 @@ int main(void) if (mp_cmp(&b, &c) != MP_EQ) { printf("Failed on trial %lu\n", rr); exit(-1); - + } - } while (++rr < 1000000); + } while (++rr < 10000); printf("Passed DR test for %d digits\n", cnt); } #endif @@ -369,7 +370,7 @@ int main(void) div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n = sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= 0; - + /* force KARA and TOOM to enable despite cutoffs */ KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 110; TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 150; diff --git a/etc/2kprime.1 b/etc/2kprime.1 index c41ded1..e69de29 100644 --- a/etc/2kprime.1 +++ b/etc/2kprime.1 @@ -1,2 +0,0 @@ -256-bits (k = 36113) = 115792089237316195423570985008687907853269984665640564039457584007913129603823 -512-bits (k = 38117) = 13407807929942597099574024998205846127479365820592393377723561443721764030073546976801874298166903427690031858186486050853753882811946569946433649006045979 diff --git a/etc/2kprime.c b/etc/2kprime.c index 4f1d4bb..d48b83e 100644 --- a/etc/2kprime.c +++ b/etc/2kprime.c @@ -1,80 +1,80 @@ -/* Makes safe primes of a 2k nature */ -#include -#include - -int sizes[] = {256, 512, 768, 1024, 1536, 2048, 3072, 4096}; - -int main(void) -{ - char buf[2000]; - int x, y; - mp_int q, p; - FILE *out; - clock_t t1; - mp_digit z; - - mp_init_multi(&q, &p, NULL); - - out = fopen("2kprime.1", "w"); - for (x = 0; x < (int)(sizeof(sizes) / sizeof(sizes[0])); x++) { - top: - mp_2expt(&q, sizes[x]); - mp_add_d(&q, 3, &q); - z = -3; - - t1 = clock(); - for(;;) { - mp_sub_d(&q, 4, &q); - z += 4; - - if (z > MP_MASK) { - printf("No primes of size %d found\n", sizes[x]); - break; - } - - if (clock() - t1 > CLOCKS_PER_SEC) { - printf("."); fflush(stdout); -// sleep((clock() - t1 + CLOCKS_PER_SEC/2)/CLOCKS_PER_SEC); - t1 = clock(); - } - - /* quick test on q */ - mp_prime_is_prime(&q, 1, &y); - if (y == 0) { - continue; - } - - /* find (q-1)/2 */ - mp_sub_d(&q, 1, &p); - mp_div_2(&p, &p); - mp_prime_is_prime(&p, 3, &y); - if (y == 0) { - continue; - } - - /* test on q */ - mp_prime_is_prime(&q, 3, &y); - if (y == 0) { - continue; - } - - break; - } - - if (y == 0) { - ++sizes[x]; - goto top; - } - - mp_toradix(&q, buf, 10); - printf("\n\n%d-bits (k = %lu) = %s\n", sizes[x], z, buf); - fprintf(out, "%d-bits (k = %lu) = %s\n", sizes[x], z, buf); fflush(out); - } - - return 0; -} - - - - - +/* Makes safe primes of a 2k nature */ +#include +#include + +int sizes[] = {256, 512, 768, 1024, 1536, 2048, 3072, 4096}; + +int main(void) +{ + char buf[2000]; + int x, y; + mp_int q, p; + FILE *out; + clock_t t1; + mp_digit z; + + mp_init_multi(&q, &p, NULL); + + out = fopen("2kprime.1", "w"); + for (x = 0; x < (int)(sizeof(sizes) / sizeof(sizes[0])); x++) { + top: + mp_2expt(&q, sizes[x]); + mp_add_d(&q, 3, &q); + z = -3; + + t1 = clock(); + for(;;) { + mp_sub_d(&q, 4, &q); + z += 4; + + if (z > MP_MASK) { + printf("No primes of size %d found\n", sizes[x]); + break; + } + + if (clock() - t1 > CLOCKS_PER_SEC) { + printf("."); fflush(stdout); +// sleep((clock() - t1 + CLOCKS_PER_SEC/2)/CLOCKS_PER_SEC); + t1 = clock(); + } + + /* quick test on q */ + mp_prime_is_prime(&q, 1, &y); + if (y == 0) { + continue; + } + + /* find (q-1)/2 */ + mp_sub_d(&q, 1, &p); + mp_div_2(&p, &p); + mp_prime_is_prime(&p, 3, &y); + if (y == 0) { + continue; + } + + /* test on q */ + mp_prime_is_prime(&q, 3, &y); + if (y == 0) { + continue; + } + + break; + } + + if (y == 0) { + ++sizes[x]; + goto top; + } + + mp_toradix(&q, buf, 10); + printf("\n\n%d-bits (k = %lu) = %s\n", sizes[x], z, buf); + fprintf(out, "%d-bits (k = %lu) = %s\n", sizes[x], z, buf); fflush(out); + } + + return 0; +} + + + + + diff --git a/etc/drprime.c b/etc/drprime.c index 2b561e3..0ab8ea6 100644 --- a/etc/drprime.c +++ b/etc/drprime.c @@ -1,7 +1,7 @@ /* Makes safe primes of a DR nature */ #include -int sizes[] = { 256/DIGIT_BIT, 512/DIGIT_BIT, 768/DIGIT_BIT, 1024/DIGIT_BIT, 2048/DIGIT_BIT, 4096/DIGIT_BIT }; +int sizes[] = { 1+256/DIGIT_BIT, 1+512/DIGIT_BIT, 1+768/DIGIT_BIT, 1+1024/DIGIT_BIT, 1+2048/DIGIT_BIT, 1+4096/DIGIT_BIT }; int main(void) { int res, x, y; @@ -27,6 +27,7 @@ int main(void) a.used = sizes[x]; /* now loop */ + res = 0; for (;;) { a.dp[0] += 4; if (a.dp[0] >= MP_MASK) break; diff --git a/etc/drprimes.txt b/etc/drprimes.txt index 717420d..5022e80 100644 --- a/etc/drprimes.txt +++ b/etc/drprimes.txt @@ -1,15 +1,3 @@ -300-bit prime: -p == 2037035976334486086268445688409378161051468393665936250636140449354381299763336706183393387 - -510-bit prime: -p == 3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251494847 - -765-bit prime: -p == 194064761537588616893622436057812819407110752139587076392381504753256369085797110791359801103580809743810966337141384150771447505514351798930535909380147642400556872002606238193783160703949805603157874899214558593861605856727005843 - -1740-bit prime: -p == 61971563797913992479098926650774597592238975917324828616370329001490282756182680310375299496755876376552390992409906202402580445340335946188208371182877207703039791403230793200138374588682414508868522097839706723444887189794752005280474068640895359332705297533442094790319040758184131464298255306336601284054032615054089107503261218395204931877449590906016855549287497608058070532126514935495184332288660623518513755499687752752528983258754107553298994358814410594621086881204713587661301862918471291451469190214535690028223 - -2145-bit prime: -p == 5120834017984591518147028606005386392991070803233539296225079797126347381640561714282620018633786528684625023495254338414266418034876748837569635527008462887513799703364491256252208677097644781218029521545625387720450034199300257983090290650191518075514440227307582827991892955933645635564359934476985058395497772801264225688705417270604479898255105628816161764712152286804906915652283101897505006786990112535065979412882966109410722156057838063961993103028819293481078313367826492291911499907219457764211473530756735049840415233164976184864760203928986194694093688479274544786530457604655777313274555786861719645260099496565700321073395329400403 - +280-bit prime: +p == 1942668892225729070919461906823518906642406839052139521251812409738904285204940164839 + diff --git a/makefile b/makefile index afc1000..e40b637 100644 --- a/makefile +++ b/makefile @@ -6,7 +6,7 @@ CFLAGS += -I./ -Wall -W -Wshadow -O3 -funroll-loops #x86 optimizations [should be valid for any GCC install though] CFLAGS += -fomit-frame-pointer -VERSION=0.26 +VERSION=0.27 default: libtommath.a @@ -95,7 +95,7 @@ manual: rm -f bn.aux bn.dvi bn.log clean: - rm -f *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \ + rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \ tommath.idx tommath.toc tommath.log tommath.aux tommath.dvi tommath.lof tommath.ind tommath.ilg *.ps *.pdf *.log *.s mpi.c \ poster.aux poster.dvi poster.log cd etc ; make clean diff --git a/poster.pdf b/poster.pdf index d12d949..237e88e 100644 Binary files a/poster.pdf and b/poster.pdf differ diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index 72cc770..e146f4c 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -942,9 +942,6 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) /* if a is positive */ if (a->sign == MP_ZPOS) { - /* setup size */ - c->used = a->used + 1; - /* add digit, after this we're propagating * the carry. */ @@ -961,6 +958,9 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) /* set final carry */ ix++; *tmpc++ = mu; + + /* setup size */ + c->used = a->used + 1; } else { /* a was negative and |a| < b */ c->used = 1; @@ -2121,7 +2121,7 @@ int mp_dr_is_modulus(mp_int *a) * * Has been modified to use algorithm 7.10 from the LTM book instead * - * Input x must be in the range 0 <= x <= (n-1)^2 + * Input x must be in the range 0 <= x <= (n-1)**2 */ int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) @@ -2129,10 +2129,10 @@ mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) int err, i, m; mp_word r; mp_digit mu, *tmpx1, *tmpx2; - + /* m = digits in modulus */ m = n->used; - + /* ensure that "x" has at least 2m digits */ if (x->alloc < m + m) { if ((err = mp_grow (x, m + m)) != MP_OKAY) { @@ -2140,20 +2140,20 @@ mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) } } -/* top of loop, this is where the code resumes if +/* top of loop, this is where the code resumes if * another reduction pass is required. */ top: /* aliases for digits */ /* alias for lower half of x */ tmpx1 = x->dp; - + /* alias for upper half of x, or x/B**m */ tmpx2 = x->dp + m; - + /* set carry to zero */ mu = 0; - + /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ for (i = 0; i < m; i++) { r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; @@ -2172,7 +2172,7 @@ top: /* clamp, sub and return */ mp_clamp (x); - /* if x >= n then subtract and reduce again + /* if x >= n then subtract and reduce again * Each successive "recursion" makes the input smaller and smaller. */ if (mp_cmp_mag (x, n) != MP_LT) { @@ -2402,7 +2402,7 @@ mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) */ #include -/* computes Y == G^X mod P, HAC pp.616, Algorithm 14.85 +/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 * * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. * The value of k changes based on the size of the exponent. @@ -2422,10 +2422,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) mp_int M[TAB_SIZE], res; mp_digit buf, mp; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; - + /* use a pointer to the reduction algorithm. This allows us to use * one of many reduction algorithms without modding the guts of - * the code with if statements everywhere. + * the code with if statements everywhere. */ int (*redux)(mp_int*,mp_int*,mp_digit); @@ -2456,7 +2456,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) /* init M array */ /* init first cell */ if ((err = mp_init(&M[1])) != MP_OKAY) { - return err; + return err; } /* now init the second half of the array */ @@ -2476,7 +2476,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { goto __M; } - + /* automatically pick the comba one if available (saves quite a few calls/ifs) */ if (((P->used * 2 + 1) < MP_WARRAY) && P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { @@ -2926,17 +2926,29 @@ int mp_grow (mp_int * a, int size) { int i; + mp_digit *tmp; + /* if the alloc size is smaller alloc more ram */ if (a->alloc < size) { /* ensure there are always at least MP_PREC digits extra on top */ - size += (MP_PREC * 2) - (size % MP_PREC); + size += (MP_PREC * 2) - (size % MP_PREC); - a->dp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size); - if (a->dp == NULL) { + /* reallocate the array a->dp + * + * We store the return in a temporary variable + * in case the operation failed we don't want + * to overwrite the dp member of a. + */ + tmp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size); + if (tmp == NULL) { + /* reallocation failed but "a" is still valid [can be freed] */ return MP_MEM; } + /* reallocation succeeded so set a->dp */ + a->dp = tmp; + /* zero excess digits */ i = a->alloc; a->alloc = size; @@ -3874,7 +3886,7 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) */ #include -/* calc a value mod 2^b */ +/* calc a value mod 2**b */ int mp_mod_2d (mp_int * a, int b, mp_int * c) { @@ -4405,12 +4417,13 @@ mp_mul_2d (mp_int * a, int b, mp_int * c) int mp_mul_d (mp_int * a, mp_digit b, mp_int * c) { - int res, pa, olduse; + mp_digit u, *tmpa, *tmpc; + mp_word r; + int ix, res, olduse; /* make sure c is big enough to hold a*b */ - pa = a->used; - if (c->alloc < pa + 1) { - if ((res = mp_grow (c, pa + 1)) != MP_OKAY) { + if (c->alloc < a->used + 1) { + if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { return res; } } @@ -4418,43 +4431,42 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c) /* get the original destinations used count */ olduse = c->used; - /* set the new temporary used count */ - c->used = pa + 1; + /* set the sign */ c->sign = a->sign; - { - register mp_digit u, *tmpa, *tmpc; - register mp_word r; - register int ix; + /* alias for a->dp [source] */ + tmpa = a->dp; - /* alias for a->dp [source] */ - tmpa = a->dp; + /* alias for c->dp [dest] */ + tmpc = c->dp; - /* alias for c->dp [dest] */ - tmpc = c->dp; + /* zero carry */ + u = 0; - /* zero carry */ - u = 0; - for (ix = 0; ix < pa; ix++) { - /* compute product and carry sum for this term */ - r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); + /* compute columns */ + for (ix = 0; ix < a->used; ix++) { + /* compute product and carry sum for this term */ + r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); - /* mask off higher bits to get a single digit */ - *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); + /* mask off higher bits to get a single digit */ + *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); - /* send carry into next iteration */ - u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); - } - /* store final carry [if any] */ - *tmpc++ = u; - - /* now zero digits above the top */ - for (; pa < olduse; pa++) { - *tmpc++ = 0; - } + /* send carry into next iteration */ + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } - mp_clamp (c); + /* store final carry [if any] */ + *tmpc++ = u; + + /* now zero digits above the top */ + while (ix++ < olduse) { + *tmpc++ = 0; + } + + /* set used count */ + c->used = a->used + 1; + mp_clamp(c); + return MP_OKAY; } @@ -6172,7 +6184,8 @@ mp_sub_d (mp_int * a, mp_digit b, mp_int * c) } } - for (; ix < oldused; ix++) { + /* zero excess digits */ + while (ix++ < oldused) { *tmpc++ = 0; } mp_clamp(c); @@ -6596,12 +6609,12 @@ ERR: #include /* squaring using Toom-Cook 3-way algorithm */ -int +int mp_toom_sqr(mp_int *a, mp_int *b) { mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; int res, B; - + /* init temps */ if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { return res; @@ -6609,8 +6622,8 @@ mp_toom_sqr(mp_int *a, mp_int *b) /* B */ B = a->used / 3; - - /* a = a2 * B^2 + a1 * B + a0 */ + + /* a = a2 * B**2 + a1 * B + a0 */ if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { goto ERR; } @@ -6625,17 +6638,17 @@ mp_toom_sqr(mp_int *a, mp_int *b) goto ERR; } mp_rshd(&a2, B*2); - + /* w0 = a0*a0 */ if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) { goto ERR; } - + /* w4 = a2 * a2 */ if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) { goto ERR; } - + /* w1 = (a2 + 2(a1 + 2a0))**2 */ if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { goto ERR; @@ -6649,11 +6662,11 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { goto ERR; } - + if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) { goto ERR; } - + /* w3 = (a0 + 2(a1 + 2a2))**2 */ if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { goto ERR; @@ -6667,11 +6680,11 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { goto ERR; } - + if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) { goto ERR; } - + /* w2 = (a2 + a1 + a0)**2 */ if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { @@ -6683,18 +6696,18 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) { goto ERR; } - - /* now solve the matrix - + + /* now solve the matrix + 0 0 0 0 1 1 2 4 8 16 1 1 1 1 1 16 8 4 2 1 1 0 0 0 0 - + using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. */ - + /* r1 - r4 */ if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { goto ERR; @@ -6766,7 +6779,7 @@ mp_toom_sqr(mp_int *a, mp_int *b) if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { goto ERR; } - + /* at this point shift W[n] by B*n */ if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { goto ERR; @@ -6779,8 +6792,8 @@ mp_toom_sqr(mp_int *a, mp_int *b) } if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { goto ERR; - } - + } + if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) { goto ERR; } @@ -6792,13 +6805,13 @@ mp_toom_sqr(mp_int *a, mp_int *b) } if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) { goto ERR; - } - + } + ERR: mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); return res; -} - +} + /* End: bn_mp_toom_sqr.c */