added libtommath-0.31

This commit is contained in:
Tom St Denis 2004-08-09 22:15:59 +00:00 committed by Steffen Jaeckel
parent 350578d400
commit 8eaa98807b
75 changed files with 5111 additions and 5218 deletions

BIN
bn.pdf

Binary file not shown.

2
bn.tex
View File

@ -49,7 +49,7 @@
\begin{document} \begin{document}
\frontmatter \frontmatter
\pagestyle{empty} \pagestyle{empty}
\title{LibTomMath User Manual \\ v0.30} \title{LibTomMath User Manual \\ v0.31}
\author{Tom St Denis \\ tomstdenis@iahu.ca} \author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle \maketitle
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been

View File

@ -88,7 +88,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
} }
/* setup dest */ /* setup dest */
olduse = c->used; olduse = c->used;
c->used = digs; c->used = digs;
{ {

View File

@ -36,7 +36,7 @@ mp_2expt (mp_int * a, int b)
a->used = b / DIGIT_BIT + 1; a->used = b / DIGIT_BIT + 1;
/* put the single bit in its place */ /* put the single bit in its place */
a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT); a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
return MP_OKAY; return MP_OKAY;
} }

View File

@ -18,10 +18,14 @@
void void
mp_clear (mp_int * a) mp_clear (mp_int * a)
{ {
int i;
/* only do anything if a hasn't been freed previously */ /* only do anything if a hasn't been freed previously */
if (a->dp != NULL) { if (a->dp != NULL) {
/* first zero the digits */ /* first zero the digits */
memset (a->dp, 0, sizeof (mp_digit) * a->used); for (i = 0; i < a->used; i++) {
a->dp[i] = 0;
}
/* free ram */ /* free ram */
XFREE(a->dp); XFREE(a->dp);

View File

@ -187,7 +187,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
*/ */
/* get sign before writing to c */ /* get sign before writing to c */
x.sign = a->sign; x.sign = x.used == 0 ? MP_ZPOS : a->sign;
if (c != NULL) { if (c != NULL) {
mp_clamp (&q); mp_clamp (&q);

View File

@ -14,15 +14,22 @@
*/ */
#include <tommath.h> #include <tommath.h>
/* init a new bigint */ /* init a new mp_int */
int mp_init (mp_int * a) int mp_init (mp_int * a)
{ {
int i;
/* allocate memory required and clear it */ /* allocate memory required and clear it */
a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC); a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
if (a->dp == NULL) { if (a->dp == NULL) {
return MP_MEM; return MP_MEM;
} }
/* set the digits to zero */
for (i = 0; i < MP_PREC; i++) {
a->dp[i] = 0;
}
/* set the used to zero, allocated digits to the default precision /* set the used to zero, allocated digits to the default precision
* and sign to positive */ * and sign to positive */
a->used = 0; a->used = 0;

View File

@ -76,9 +76,6 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
goto X0Y0; goto X0Y0;
/* now shift the digits */ /* now shift the digits */
x0.sign = x1.sign = a->sign;
y0.sign = y1.sign = b->sign;
x0.used = y0.used = B; x0.used = y0.used = B;
x1.used = a->used - B; x1.used = a->used - B;
y1.used = b->used - B; y1.used = b->used - B;

View File

@ -43,6 +43,6 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
res = s_mp_mul (a, b, c); res = s_mp_mul (a, b, c);
} }
} }
c->sign = neg; c->sign = (c->used > 0) ? neg : MP_ZPOS;
return res; return res;
} }

View File

@ -17,7 +17,8 @@
/* determines if mp_reduce_2k can be used */ /* determines if mp_reduce_2k can be used */
int mp_reduce_is_2k(mp_int *a) int mp_reduce_is_2k(mp_int *a)
{ {
int ix, iy, iz, iw; int ix, iy, iw;
mp_digit iz;
if (a->used == 0) { if (a->used == 0) {
return 0; return 0;
@ -34,7 +35,7 @@ int mp_reduce_is_2k(mp_int *a)
return 0; return 0;
} }
iz <<= 1; iz <<= 1;
if (iz > (int)MP_MASK) { if (iz > (mp_digit)MP_MASK) {
++iw; ++iw;
iz = 1; iz = 1;
} }

View File

@ -18,14 +18,16 @@
CPU /Compiler /MUL CUTOFF/SQR CUTOFF CPU /Compiler /MUL CUTOFF/SQR CUTOFF
------------------------------------------------------------- -------------------------------------------------------------
Intel P4 /GCC v3.2 / 70/ 108 Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build
AMD Athlon XP /GCC v3.2 / 109/ 127 Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build
Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build
Intel P4 Northwood /ICC v8.0 / 54/ 76/profiled_single build
AMD Athlon XP /GCC v3.2 / 109/ 127/
*/ */
/* configured for a AMD XP Thoroughbred core with etc/tune.c */ int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */ KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400; TOOM_SQR_CUTOFF = 400;

View File

@ -84,6 +84,7 @@ while (<IN>) {
$text[$line++] = $_; $text[$line++] = $_;
last if ($_ =~ /tommath\.h/); last if ($_ =~ /tommath\.h/);
} }
<SRC>;
} }
$inline = 0; $inline = 0;

View File

@ -1,3 +1,12 @@
August 9th, 2004
v0.31 -- "profiled" builds now :-) new timings for Intel Northwoods
-- Added "pretty" build target
-- Update mp_init() to actually assign 0's instead of relying on calloc()
-- "Wolfgang Ehrhardt" <Wolfgang.Ehrhardt@munich.netsurf.de> found a bug in mp_mul() where if
you multiply a negative by zero you get negative zero as the result. Oops.
-- J Harper from PeerSec let me toy with his AMD64 and I got 60-bit digits working properly
[this also means that I fixed a bug where if sizeof(int) < sizeof(mp_digit) it would bug]
April 11th, 2004 April 11th, 2004
v0.30 -- Added "mp_toradix_n" which stores upto "n-1" least significant digits of an mp_int v0.30 -- Added "mp_toradix_n" which stores upto "n-1" least significant digits of an mp_int
-- Johan Lindh sent a patch so MSVC wouldn't whine about redefining malloc [in weird dll modes] -- Johan Lindh sent a patch so MSVC wouldn't whine about redefining malloc [in weird dll modes]

View File

@ -1,7 +1,5 @@
#include <time.h> #include <time.h>
#define TESTING
#ifdef IOWNANATHLON #ifdef IOWNANATHLON
#include <unistd.h> #include <unistd.h>
#define SLEEP sleep(4) #define SLEEP sleep(4)
@ -11,49 +9,6 @@
#include "tommath.h" #include "tommath.h"
#ifdef TIMER
ulong64 _tt;
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#ifdef __i386__
ulong64 a;
__asm__ __volatile__ ("rdtsc ":"=A" (a));
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
#else
#define TIMFUNC clock
#endif
ulong64 rdtsc(void) { return TIMFUNC() - _tt; }
void reset(void) { _tt = TIMFUNC(); }
#endif
void ndraw(mp_int *a, char *name) void ndraw(mp_int *a, char *name)
{ {
char buf[4096]; char buf[4096];
@ -89,10 +44,6 @@ int myrng(unsigned char *dst, int len, void *dat)
} }
#define DO2(x) x; x;
#define DO4(x) DO2(x); DO2(x);
#define DO8(x) DO4(x); DO4(x);
#define DO(x) DO8(x); DO8(x);
char cmd[4096], buf[4096]; char cmd[4096], buf[4096];
int main(void) int main(void)
@ -103,10 +54,6 @@ int main(void)
unsigned rr; unsigned rr;
int i, n, err, cnt, ix, old_kara_m, old_kara_s; int i, n, err, cnt, ix, old_kara_m, old_kara_s;
#ifdef TIMER
ulong64 tt, CLK_PER_SEC;
FILE *log, *logb, *logc;
#endif
mp_init(&a); mp_init(&a);
mp_init(&b); mp_init(&b);
@ -117,11 +64,10 @@ int main(void)
srand(time(NULL)); srand(time(NULL));
#ifdef TESTING
// test mp_get_int // test mp_get_int
printf("Testing: mp_get_int\n"); printf("Testing: mp_get_int\n");
for(i=0;i<1000;++i) { for(i=0;i<1000;++i) {
t = (unsigned long)rand()*rand()+1; t = ((unsigned long)rand()*rand()+1)&0xFFFFFFFF;
mp_set_int(&a,t); mp_set_int(&a,t);
if (t!=mp_get_int(&a)) { if (t!=mp_get_int(&a)) {
printf("mp_get_int() bad result!\n"); printf("mp_get_int() bad result!\n");
@ -141,7 +87,7 @@ int main(void)
// test mp_sqrt // test mp_sqrt
printf("Testing: mp_sqrt\n"); printf("Testing: mp_sqrt\n");
for (i=0;i<10000;++i) { for (i=0;i<1000;++i) {
printf("%6d\r", i); fflush(stdout); printf("%6d\r", i); fflush(stdout);
n = (rand()&15)+1; n = (rand()&15)+1;
mp_rand(&a,n); mp_rand(&a,n);
@ -157,7 +103,7 @@ int main(void)
} }
printf("\nTesting: mp_is_square\n"); printf("\nTesting: mp_is_square\n");
for (i=0;i<100000;++i) { for (i=0;i<1000;++i) {
printf("%6d\r", i); fflush(stdout); printf("%6d\r", i); fflush(stdout);
/* test mp_is_square false negatives */ /* test mp_is_square false negatives */
@ -186,11 +132,9 @@ int main(void)
} }
printf("\n\n"); printf("\n\n");
#endif
#ifdef TESTING
/* test for size */ /* test for size */
for (ix = 16; ix < 512; ix++) { for (ix = 10; ix < 256; ix++) {
printf("Testing (not safe-prime): %9d bits \r", ix); fflush(stdout); printf("Testing (not safe-prime): %9d bits \r", ix); fflush(stdout);
err = mp_prime_random_ex(&a, 8, ix, (rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON, myrng, NULL); err = mp_prime_random_ex(&a, 8, ix, (rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON, myrng, NULL);
if (err != MP_OKAY) { if (err != MP_OKAY) {
@ -203,7 +147,7 @@ int main(void)
} }
} }
for (ix = 16; ix < 512; ix++) { for (ix = 16; ix < 256; ix++) {
printf("Testing ( safe-prime): %9d bits \r", ix); fflush(stdout); printf("Testing ( safe-prime): %9d bits \r", ix); fflush(stdout);
err = mp_prime_random_ex(&a, 8, ix, ((rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON)|LTM_PRIME_SAFE, myrng, NULL); err = mp_prime_random_ex(&a, 8, ix, ((rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON)|LTM_PRIME_SAFE, myrng, NULL);
if (err != MP_OKAY) { if (err != MP_OKAY) {
@ -225,9 +169,7 @@ int main(void)
} }
printf("\n\n"); printf("\n\n");
#endif
#ifdef TESTING
mp_read_radix(&a, "123456", 10); mp_read_radix(&a, "123456", 10);
mp_toradix_n(&a, buf, 10, 3); mp_toradix_n(&a, buf, 10, 3);
printf("a == %s\n", buf); printf("a == %s\n", buf);
@ -235,7 +177,6 @@ int main(void)
printf("a == %s\n", buf); printf("a == %s\n", buf);
mp_toradix_n(&a, buf, 10, 30); mp_toradix_n(&a, buf, 10, 30);
printf("a == %s\n", buf); printf("a == %s\n", buf);
#endif
#if 0 #if 0
@ -248,22 +189,6 @@ int main(void)
} }
#endif #endif
#if 0
{
mp_word aa, bb;
for (;;) {
aa = abs(rand()) & MP_MASK;
bb = abs(rand()) & MP_MASK;
if (MULT(aa,bb) != (aa*bb)) {
printf("%llu * %llu == %llu or %llu?\n", aa, bb, (ulong64)MULT(aa,bb), (ulong64)(aa*bb));
return 0;
}
}
}
#endif
#ifdef TESTING
/* test mp_cnt_lsb */ /* test mp_cnt_lsb */
printf("testing mp_cnt_lsb...\n"); printf("testing mp_cnt_lsb...\n");
mp_set(&a, 1); mp_set(&a, 1);
@ -274,12 +199,10 @@ int main(void)
} }
mp_mul_2(&a, &a); mp_mul_2(&a, &a);
} }
#endif
/* test mp_reduce_2k */ /* test mp_reduce_2k */
#ifdef TESTING
printf("Testing mp_reduce_2k...\n"); printf("Testing mp_reduce_2k...\n");
for (cnt = 3; cnt <= 384; ++cnt) { for (cnt = 3; cnt <= 128; ++cnt) {
mp_digit tmp; mp_digit tmp;
mp_2expt(&a, cnt); mp_2expt(&a, cnt);
mp_sub_d(&a, 2, &a); /* a = 2**cnt - 2 */ mp_sub_d(&a, 2, &a); /* a = 2**cnt - 2 */
@ -289,7 +212,7 @@ int main(void)
printf("(%d)", mp_reduce_is_2k(&a)); printf("(%d)", mp_reduce_is_2k(&a));
mp_reduce_2k_setup(&a, &tmp); mp_reduce_2k_setup(&a, &tmp);
printf("(%d)", tmp); printf("(%d)", tmp);
for (ix = 0; ix < 10000; ix++) { for (ix = 0; ix < 1000; ix++) {
if (!(ix & 127)) {printf("."); fflush(stdout); } if (!(ix & 127)) {printf("."); fflush(stdout); }
mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2); mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2);
mp_copy(&c, &b); mp_copy(&c, &b);
@ -301,14 +224,11 @@ int main(void)
} }
} }
} }
#endif
/* test mp_div_3 */ /* test mp_div_3 */
#ifdef TESTING
printf("Testing mp_div_3...\n"); printf("Testing mp_div_3...\n");
mp_set(&d, 3); mp_set(&d, 3);
for (cnt = 0; cnt < 1000000; ) { for (cnt = 0; cnt < 10000; ) {
mp_digit r1, r2; mp_digit r1, r2;
if (!(++cnt & 127)) printf("%9d\r", cnt); if (!(++cnt & 127)) printf("%9d\r", cnt);
@ -321,12 +241,10 @@ int main(void)
} }
} }
printf("\n\nPassed div_3 testing\n"); printf("\n\nPassed div_3 testing\n");
#endif
/* test the DR reduction */ /* test the DR reduction */
#ifdef TESTING
printf("testing mp_dr_reduce...\n"); printf("testing mp_dr_reduce...\n");
for (cnt = 2; cnt < 128; cnt++) { for (cnt = 2; cnt < 32; cnt++) {
printf("%d digit modulus\n", cnt); printf("%d digit modulus\n", cnt);
mp_grow(&a, cnt); mp_grow(&a, cnt);
mp_zero(&a); mp_zero(&a);
@ -334,7 +252,7 @@ int main(void)
a.dp[ix] = MP_MASK; a.dp[ix] = MP_MASK;
} }
a.used = cnt; a.used = cnt;
mp_prime_next_prime(&a, 3, 0); a.dp[0] = 3;
mp_rand(&b, cnt - 1); mp_rand(&b, cnt - 1);
mp_copy(&b, &c); mp_copy(&b, &c);
@ -346,206 +264,16 @@ int main(void)
mp_copy(&b, &c); mp_copy(&b, &c);
mp_mod(&b, &a, &b); mp_mod(&b, &a, &b);
mp_dr_reduce(&c, &a, (1<<DIGIT_BIT)-a.dp[0]); mp_dr_reduce(&c, &a, (((mp_digit)1)<<DIGIT_BIT)-a.dp[0]);
if (mp_cmp(&b, &c) != MP_EQ) { if (mp_cmp(&b, &c) != MP_EQ) {
printf("Failed on trial %lu\n", rr); exit(-1); printf("Failed on trial %lu\n", rr); exit(-1);
} }
} while (++rr < 100000); } while (++rr < 500);
printf("Passed DR test for %d digits\n", cnt); printf("Passed DR test for %d digits\n", cnt);
} }
#endif
#ifdef TIMER
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
reset();
sleep(1);
CLK_PER_SEC = rdtsc();
printf("CLK_PER_SEC == %lu\n", CLK_PER_SEC);
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_add(&a,&b,&c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_sub(&a,&b,&c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
/* do mult/square twice, first without karatsuba and second with */
mult_test:
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
for (ix = 0; ix < 2; ix++) {
printf("With%s Karatsuba\n", (ix==0)?"out":"");
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_mul(&a, &b, &c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
reset();
rr = 0;
do {
DO(mp_sqr(&a, &b));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
}
expt_test:
{
char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
};
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) {
SLEEP;
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
for (rr = 0; rr < mp_count_bits(&a); rr++) {
mp_mul_2(&b, &b);
b.dp[0] |= lbit();
b.used += 1;
}
mp_sub_d(&a, 1, &c);
mp_mod(&b, &c, &b);
mp_set(&c, 3);
reset();
rr = 0;
do {
DO(mp_exptmod(&c, &b, &a, &d));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
if (mp_cmp_d(&d, 1)) {
printf("Different (%d)!!!\n", mp_count_bits(&a));
draw(&d);
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt);
}
}
fclose(log);
fclose(logb);
fclose(logc);
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
do {
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
} while (mp_cmp_d(&c, 1) != MP_EQ);
reset();
rr = 0;
do {
DO(mp_invmod(&b, &a, &c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
mp_mulmod(&b, &c, &a, &d);
if (mp_cmp_d(&d, 1) != MP_EQ) {
printf("Failed to invert\n");
return 0;
}
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt);
}
fclose(log);
return 0;
#endif
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n = div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= 0; sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= 0;

291
demo/timing.c Normal file
View File

@ -0,0 +1,291 @@
#include <tommath.h>
#include <time.h>
ulong64 _tt;
#ifdef IOWNANATHLON
#include <unistd.h>
#define SLEEP sleep(4)
#else
#define SLEEP
#endif
void ndraw(mp_int *a, char *name)
{
char buf[4096];
printf("%s: ", name);
mp_toradix(a, buf, 64);
printf("%s\n", buf);
}
static void draw(mp_int *a)
{
ndraw(a, "");
}
unsigned long lfsr = 0xAAAAAAAAUL;
int lbit(void)
{
if (lfsr & 0x80000000UL) {
lfsr = ((lfsr << 1) ^ 0x8000001BUL) & 0xFFFFFFFFUL;
return 1;
} else {
lfsr <<= 1;
return 0;
}
}
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#ifdef __i386__
ulong64 a;
__asm__ __volatile__ ("rdtsc ":"=A" (a));
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
#else
#define TIMFUNC clock
#endif
#define DO(x) x; x;
//#define DO4(x) DO2(x); DO2(x);
//#define DO8(x) DO4(x); DO4(x);
//#define DO(x) DO8(x); DO8(x);
int main(void)
{
ulong64 tt, gg, CLK_PER_SEC;
FILE *log, *logb, *logc;
mp_int a, b, c, d, e, f;
int n, cnt, ix, old_kara_m, old_kara_s;
unsigned rr;
mp_init(&a);
mp_init(&b);
mp_init(&c);
mp_init(&d);
mp_init(&e);
mp_init(&f);
srand(time(NULL));
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
CLK_PER_SEC = TIMFUNC();
sleep(1);
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_add(&a,&b,&c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100000);
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
}
fclose(log);
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sub(&a,&b,&c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100000);
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
}
fclose(log);
/* do mult/square twice, first without karatsuba and second with */
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
for (ix = 0; ix < 1; ix++) {
printf("With%s Karatsuba\n", (ix==0)?"out":"");
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_mul(&a, &b, &c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100);
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
}
fclose(log);
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sqr(&a, &b));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100);
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
}
fclose(log);
}
{
char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
};
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) {
SLEEP;
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
for (rr = 0; rr < (unsigned)mp_count_bits(&a); rr++) {
mp_mul_2(&b, &b);
b.dp[0] |= lbit();
b.used += 1;
}
mp_sub_d(&a, 1, &c);
mp_mod(&b, &c, &b);
mp_set(&c, 3);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_exptmod(&c, &b, &a, &d));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 10);
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
if (mp_cmp_d(&d, 1)) {
printf("Different (%d)!!!\n", mp_count_bits(&a));
draw(&d);
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), tt);
}
}
fclose(log);
fclose(logb);
fclose(logc);
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
do {
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
} while (mp_cmp_d(&c, 1) != MP_EQ);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_invmod(&b, &a, &c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 1000);
mp_mulmod(&b, &c, &a, &d);
if (mp_cmp_d(&d, 1) != MP_EQ) {
printf("Failed to invert\n");
return 0;
}
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt);
}
fclose(log);
return 0;
}

View File

@ -46,4 +46,5 @@ mont: mont.o
clean: clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat \
*.da *.dyn *.dpi *~

67
etc/makefile.icc Normal file
View File

@ -0,0 +1,67 @@
CC = icc
CFLAGS += -I../
# optimize for SPEED
#
# -mcpu= can be pentium, pentiumpro (covers PII through PIII) or pentium4
# -ax? specifies make code specifically for ? but compatible with IA-32
# -x? specifies compile solely for ? [not specifically IA-32 compatible]
#
# where ? is
# K - PIII
# W - first P4 [Williamette]
# N - P4 Northwood
# P - P4 Prescott
# B - Blend of P4 and PM [mobile]
#
# Default to just generic max opts
CFLAGS += -O3 -xN -ip
# default lib name (requires install with root)
# LIBNAME=-ltommath
# libname when you can't install the lib with install
LIBNAME=../libtommath.a
#provable primes
pprime: pprime.o
$(CC) pprime.o $(LIBNAME) -o pprime
# portable [well requires clock()] tuning app
tune: tune.o
$(CC) tune.o $(LIBNAME) -o tune
# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
tune86: tune.c
nasm -f coff timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
# for cygwin
tune86c: tune.c
nasm -f gnuwin32 timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
#make tune86 for linux or any ELF format
tune86l: tune.c
nasm -f elf -DUSE_ELF timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
# spits out mersenne primes
mersenne: mersenne.o
$(CC) mersenne.o $(LIBNAME) -o mersenne
# fines DR safe primes for the given config
drprime: drprime.o
$(CC) drprime.o $(LIBNAME) -o drprime
# fines 2k safe primes for the given config
2kprime: 2kprime.o
$(CC) 2kprime.o $(LIBNAME) -o 2kprime
mont: mont.o
$(CC) mont.o $(LIBNAME) -o mont
clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il

View File

@ -1,16 +1,16 @@
224 20297071 224 1572
448 15151383 448 1740
672 13088682 672 1902
896 11111587 896 2116
1120 9240621 1120 2324
1344 8221878 1344 2484
1568 7227434 1568 2548
1792 6718051 1792 2772
2016 6042524 2016 2958
2240 5685200 2240 3058
2464 5240465 2464 3276
2688 4818032 2688 3436
2912 4412794 2912 3542
3136 4155883 3136 3702
3360 3927078 3360 3926
3584 3722138 3584 4074

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

After

Width:  |  Height:  |  Size: 6.1 KiB

View File

@ -1,7 +1,7 @@
513 745 513 19933908
769 282 769 55707832
1025 130 1025 119872576
2049 20 2049 856114218
2561 11 2561 1602741360
3073 6 3073 2718192748
4097 2 4097 6264335828

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.3 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

@ -1,6 +1,6 @@
521 783 521 18847776
607 585 607 24665920
1279 138 1279 110036220
2203 39 2203 414562036
3217 15 3217 1108350966
4253 6 4253 2286079370

View File

@ -1,7 +1,7 @@
532 1296 532 9656134
784 551 784 23022274
1036 283 1036 45227854
1540 109 1540 129652848
2072 52 2072 280625626
3080 18 3080 845619480
4116 7 4116 1866206400

View File

@ -1,17 +1,17 @@
set terminal png set terminal png
set size 1.75 set size 1.75
set ylabel "Operations per Second" set ylabel "Cycles per Operation"
set xlabel "Operand size (bits)" set xlabel "Operand size (bits)"
set output "addsub.png" set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction" plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png" set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)" plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png" set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)" plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
set output "invmod.png" set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse" plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,32 +0,0 @@
112 17364
224 8643
336 8867
448 6228
560 4737
672 2259
784 2899
896 1497
1008 1238
1120 1010
1232 870
1344 1265
1456 1102
1568 981
1680 539
1792 484
1904 722
2016 392
2128 604
2240 551
2352 511
2464 469
2576 263
2688 247
2800 227
2912 354
3024 336
3136 312
3248 296
3360 166
3472 155
3584 248

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

View File

@ -1,13 +0,0 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

View File

@ -1,16 +0,0 @@
224 11069160
448 9156136
672 8089755
896 7399424
1120 6389352
1344 5818648
1568 5257112
1792 4982160
2016 4527856
2240 4325312
2464 4051760
2688 3767640
2912 3612520
3136 3415208
3360 3258656
3584 3113360

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -1,7 +0,0 @@
513 664
769 256
1025 117
2049 17
2561 9
3073 5
4097 2

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

View File

@ -1,7 +0,0 @@
532 1088
784 460
1036 240
1540 92
2072 43
3080 15
4116 6

View File

@ -1,17 +0,0 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,24 +0,0 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

View File

@ -1,32 +0,0 @@
112 16248
224 8192
336 5320
448 3560
560 2728
672 2064
784 1704
896 2176
1008 1184
1120 976
1232 1280
1344 1176
1456 624
1568 912
1680 504
1792 452
1904 658
2016 608
2128 336
2240 312
2352 288
2464 264
2576 408
2688 376
2800 354
2912 198
3024 307
3136 173
3248 162
3360 256
3472 145
3584 226

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

View File

@ -1,17 +0,0 @@
896 322904
1344 151592
1792 90472
2240 59984
2688 42624
3136 31872
3584 24704
4032 19704
4480 16096
4928 13376
5376 11272
5824 9616
6272 8360
6720 7304
7168 1664
7616 1472
8064 1328

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.1 KiB

View File

@ -1,17 +0,0 @@
896 322872
1344 151688
1792 90480
2240 59984
2688 42656
3136 32144
3584 25840
4032 21328
4480 17856
4928 14928
5376 12856
5824 11256
6272 9880
6720 8984
7168 7928
7616 7200
8064 6576

View File

@ -1,17 +0,0 @@
896 415472
1344 223736
1792 141232
2240 97624
2688 71400
3136 54800
3584 16904
4032 13528
4480 10968
4928 9128
5376 7784
5824 6672
6272 5760
6720 5056
7168 4440
7616 3952
8064 3512

View File

@ -1,17 +0,0 @@
896 420464
1344 224800
1792 142808
2240 97704
2688 71416
3136 54504
3584 38320
4032 32360
4480 27576
4928 23840
5376 20688
5824 18264
6272 16176
6720 14440
7168 11688
7616 10752
8064 9936

View File

@ -1,16 +0,0 @@
224 9728504
448 8573648
672 7488096
896 6714064
1120 5950472
1344 5457400
1568 5038896
1792 4683632
2016 4384656
2240 4105976
2464 3871608
2688 3650680
2912 3463552
3136 3290016
3360 3135272
3584 2993848

View File

@ -1,33 +1,33 @@
920 374785 923 45612
1142 242737 1143 68010
1371 176704 1370 94894
1596 134341 1596 126514
1816 105537 1820 163014
2044 85089 2044 203564
2268 70051 2268 249156
2490 58671 2492 299226
2716 49851 2716 354138
2937 42881 2940 413022
3162 37288 3163 477406
3387 32697 3387 545876
3608 28915 3612 619044
3836 25759 3835 696754
4057 23088 4060 779174
4284 20800 4284 866216
4508 18827 4508 958100
4730 17164 4731 1055898
4956 15689 4954 1162294
5180 14397 5179 1267654
5398 13260 5404 1377572
5628 12249 5628 1503736
5852 11346 5852 1622310
6071 10537 6076 1746624
6298 9812 6299 1875390
6522 9161 6524 2009086
6742 8572 6748 2145990
6971 8038 6971 2289044
7195 2915 7196 2891644
7419 2744 7418 3064792
7644 2587 7644 3249780
7866 2444 7868 3455868
8090 2311 8092 3644238

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

After

Width:  |  Height:  |  Size: 6.6 KiB

View File

@ -1,33 +1,33 @@
924 374171 921 92388
1147 243163 1148 61410
1371 177111 1372 43799
1596 134465 1594 33047
1819 105619 1819 26913
2044 85145 2043 21996
2266 70086 2268 18453
2488 58717 2492 15623
2715 49869 2715 13378
2939 42894 2940 11626
3164 37389 3164 10252
3387 33510 3385 9291
3610 29993 3610 8348
3836 27205 3835 7615
4060 24751 4060 6928
4281 22576 4283 6401
4508 20670 4508 5836
4732 19019 4732 5387
4954 17527 4955 4985
5180 16217 5178 4614
5404 15044 5404 4300
5624 14003 5622 4005
5849 13051 5852 3742
6076 12067 6073 3502
6300 11438 6298 3262
6524 10772 6524 3137
6748 10298 6748 2967
6972 9715 6971 2807
7195 9330 7195 2679
7416 8836 7420 2571
7644 8465 7643 2442
7864 8042 7867 2324
8091 7735 8091 2235

View File

@ -1,13 +0,0 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

View File

@ -1,16 +0,0 @@
224 8113248
448 6585584
672 5687678
896 4761144
1120 4111592
1344 3995154
1568 3532387
1792 3225400
2016 2963960
2240 2720112
2464 2533952
2688 2307168
2912 2287064
3136 2150160
3360 2035992
3584 1936304

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

View File

@ -1,7 +0,0 @@
513 195
769 68
1025 31
2049 4
2561 2
3073 1
4097 0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

View File

@ -1,7 +0,0 @@
532 393
784 158
1036 79
1540 27
2072 12
3080 4
4116 1

View File

@ -1,17 +0,0 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,24 +0,0 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

View File

@ -1,32 +0,0 @@
112 13608
224 6872
336 4264
448 2792
560 2144
672 1560
784 1296
896 1672
1008 896
1120 736
1232 1024
1344 888
1456 472
1568 680
1680 373
1792 328
1904 484
2016 436
2128 232
2240 211
2352 200
2464 177
2576 293
2688 262
2800 251
2912 137
3024 216
3136 117
3248 113
3360 181
3472 98
3584 158

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.5 KiB

View File

@ -1,17 +0,0 @@
896 77600
1344 35776
1792 19688
2240 13248
2688 9424
3136 7056
3584 5464
4032 4368
4480 3568
4928 2976
5376 2520
5824 2152
6272 1872
6720 1632
7168 650
7616 576
8064 515

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

View File

@ -1,17 +0,0 @@
896 77752
1344 35832
1792 19688
2240 14704
2688 10832
3136 8336
3584 6600
4032 5424
4480 4648
4928 3976
5376 3448
5824 3016
6272 2664
6720 2384
7168 2120
7616 1912
8064 1752

View File

@ -1,17 +0,0 @@
896 128088
1344 63640
1792 37968
2240 25488
2688 18176
3136 13672
3584 4920
4032 3912
4480 3160
4928 2616
5376 2216
5824 1896
6272 1624
6720 1408
7168 1240
7616 1096
8064 984

View File

@ -1,17 +0,0 @@
896 127456
1344 63752
1792 37920
2240 25440
2688 18200
3136 13728
3584 10968
4032 9072
4480 7608
4928 6440
5376 5528
5824 4768
6272 4328
6720 3888
7168 3504
7616 3176
8064 2896

View File

@ -1,16 +0,0 @@
224 7355896
448 6162880
672 5218984
896 4622776
1120 3999320
1344 3629480
1568 3290384
1792 2954752
2016 2737056
2240 2563320
2464 2451928
2688 2310920
2912 2139048
3136 2034080
3360 1890800
3584 1808624

View File

@ -1,33 +1,33 @@
922 471095 924 26026
1147 337137 1146 37682
1366 254327 1370 51714
1596 199732 1595 68130
1819 161225 1820 86850
2044 132852 2043 107880
2268 111493 2267 131236
2490 94864 2490 156828
2715 81745 2716 184704
2940 71187 2940 214934
3162 62575 3162 247424
3387 55418 3388 282494
3612 14540 3608 308390
3836 12944 3834 345978
4060 11627 4060 386156
4281 10546 4282 427648
4508 9502 4505 471556
4730 8688 4731 517948
4954 7937 4954 566396
5180 7273 5180 618292
5402 6701 5402 670130
5627 6189 5628 725674
5850 5733 5852 783310
6076 5310 6076 843480
6300 4933 6300 905136
6522 4631 6524 969132
6748 4313 6748 1033680
6971 4064 6971 1100912
7196 3801 7195 1170954
7420 3576 7420 1252576
7642 3388 7643 1325038
7868 3191 7867 1413890
8092 3020 8091 1493140

View File

@ -1,33 +1,33 @@
922 470930 923 165854
1148 337217 1146 112539
1372 254433 1372 80388
1596 199827 1595 60051
1820 161204 1820 47498
2043 132871 2044 38017
2267 111522 2268 31935
2488 94932 2492 27373
2714 81814 2714 23798
2939 71231 2939 20630
3164 62616 3164 18198
3385 55467 3388 16191
3611 44426 3612 14538
3836 40695 3836 13038
4060 37391 4058 11683
4283 34371 4284 10915
4508 31779 4508 9998
4732 29499 4731 9271
4956 27426 4954 8555
5177 25598 5180 7910
5403 23944 5404 7383
5628 22416 5628 7012
5851 21052 5852 6527
6076 19781 6075 6175
6299 18588 6299 5737
6523 17539 6524 5398
6746 16618 6744 5110
6972 15705 6971 4864
7196 13582 7196 4567
7420 13004 7420 4371
7643 12496 7644 4182
7868 11963 7868 3981
8092 11497 8092 3758

View File

@ -1,16 +1,16 @@
224 16370431 224 2012
448 13327848 448 2208
672 11009401 672 2366
896 9125342 896 2532
1120 7930419 1120 2682
1344 7114040 1344 2838
1568 6506998 1568 3016
1792 5899346 1792 3146
2016 5435327 2016 3318
2240 5038931 2240 3538
2464 4696364 2464 3756
2688 4425678 2688 3914
2912 4134476 2912 4060
3136 3913280 3136 4216
3360 3692536 3360 4392
3584 3505219 3584 4550

View File

@ -12,7 +12,10 @@ CFLAGS += -O3 -funroll-loops
#x86 optimizations [should be valid for any GCC install though] #x86 optimizations [should be valid for any GCC install though]
CFLAGS += -fomit-frame-pointer CFLAGS += -fomit-frame-pointer
VERSION=0.30 #debug
#CFLAGS += -g3
VERSION=0.31
default: libtommath.a default: libtommath.a
@ -20,7 +23,7 @@ default: libtommath.a
LIBNAME=libtommath.a LIBNAME=libtommath.a
HEADERS=tommath.h HEADERS=tommath.h
#LIBPATH-The directory for libtomcrypt to be installed to. #LIBPATH-The directory for libtommath to be installed to.
#INCPATH-The directory to install the header files for libtommath. #INCPATH-The directory to install the header files for libtommath.
#DATAPATH-The directory to install the pdf docs. #DATAPATH-The directory to install the pdf docs.
DESTDIR= DESTDIR=
@ -58,6 +61,30 @@ libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS) $(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
ranlib libtommath.a ranlib libtommath.a
#make a profiled library (takes a while!!!)
#
# This will build the library with profile generation
# then run the test demo and rebuild the library.
#
# So far I've seen improvements in the MP math
profiled:
make CFLAGS="$(CFLAGS) -fprofile-arcs -DTESTING" timing
./ltmtest
rm -f *.a *.o ltmtest
make CFLAGS="$(CFLAGS) -fbranch-probabilities"
#make a single object profiled library
profiled_single:
perl gen.pl
$(CC) $(CFLAGS) -fprofile-arcs -DTESTING -c mpi.c -o mpi.o
$(CC) $(CFLAGS) -DTESTING -DTIMER demo/timing.c mpi.o -o ltmtest
./ltmtest
rm -f *.o ltmtest
$(CC) $(CFLAGS) -fbranch-probabilities -DTESTING -c mpi.c -o mpi.o
$(AR) $(ARFLAGS) libtommath.a mpi.o
ranlib libtommath.a
install: libtommath.a install: libtommath.a
install -d -g root -o root $(DESTDIR)$(LIBPATH) install -d -g root -o root $(DESTDIR)$(LIBPATH)
install -d -g root -o root $(DESTDIR)$(INCPATH) install -d -g root -o root $(DESTDIR)$(INCPATH)
@ -71,7 +98,7 @@ mtest: test
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s
timing: libtommath.a timing: libtommath.a
$(CC) $(CFLAGS) -DTIMER demo/demo.c libtommath.a -o ltmtest -s $(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest -s
# makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think] # makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think]
docdvi: tommath.src docdvi: tommath.src
@ -106,10 +133,13 @@ mandvi: bn.tex
manual: mandvi manual: mandvi
pdflatex bn >/dev/null pdflatex bn >/dev/null
rm -f bn.aux bn.dvi bn.log bn.idx bn.lof bn.out bn.toc rm -f bn.aux bn.dvi bn.log bn.idx bn.lof bn.out bn.toc
pretty:
perl pretty.build
clean: clean:
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \ rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex *~ demo/*~ etc/*~
cd etc ; make clean cd etc ; make clean
cd pics ; make clean cd pics ; make clean

View File

@ -30,7 +30,8 @@ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \ bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \ bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \ bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
bn_mp_init_set.obj bn_mp_init_set_int.obj
TARGET = libtommath.lib TARGET = libtommath.lib

View File

@ -35,7 +35,8 @@ bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \ bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \ bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \ bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o
# make a Windows DLL via Cygwin # make a Windows DLL via Cygwin
windll: $(OBJECTS) windll: $(OBJECTS)

110
makefile.icc Normal file
View File

@ -0,0 +1,110 @@
#Makefile for ICC
#
#Tom St Denis
CC=icc
CFLAGS += -I./
# optimize for SPEED
#
# -mcpu= can be pentium, pentiumpro (covers PII through PIII) or pentium4
# -ax? specifies make code specifically for ? but compatible with IA-32
# -x? specifies compile solely for ? [not specifically IA-32 compatible]
#
# where ? is
# K - PIII
# W - first P4 [Williamette]
# N - P4 Northwood
# P - P4 Prescott
# B - Blend of P4 and PM [mobile]
#
# Default to just generic max opts
CFLAGS += -O3 -xN
default: libtommath.a
#default files to install
LIBNAME=libtommath.a
HEADERS=tommath.h
#LIBPATH-The directory for libtomcrypt to be installed to.
#INCPATH-The directory to install the header files for libtommath.
#DATAPATH-The directory to install the pdf docs.
DESTDIR=
LIBPATH=/usr/lib
INCPATH=/usr/include
DATAPATH=/usr/share/doc/libtommath/pdf
OBJECTS=bncore.o bn_mp_init.o bn_mp_clear.o bn_mp_exch.o bn_mp_grow.o bn_mp_shrink.o \
bn_mp_clamp.o bn_mp_zero.o bn_mp_set.o bn_mp_set_int.o bn_mp_init_size.o bn_mp_copy.o \
bn_mp_init_copy.o bn_mp_abs.o bn_mp_neg.o bn_mp_cmp_mag.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_rshd.o bn_mp_lshd.o bn_mp_mod_2d.o bn_mp_div_2d.o bn_mp_mul_2d.o bn_mp_div_2.o \
bn_mp_mul_2.o bn_s_mp_add.o bn_s_mp_sub.o bn_fast_s_mp_mul_digs.o bn_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_s_mp_sqr.o \
bn_mp_add.o bn_mp_sub.o bn_mp_karatsuba_mul.o bn_mp_mul.o bn_mp_karatsuba_sqr.o \
bn_mp_sqr.o bn_mp_div.o bn_mp_mod.o bn_mp_add_d.o bn_mp_sub_d.o bn_mp_mul_d.o \
bn_mp_div_d.o bn_mp_mod_d.o bn_mp_expt_d.o bn_mp_addmod.o bn_mp_submod.o \
bn_mp_mulmod.o bn_mp_sqrmod.o bn_mp_gcd.o bn_mp_lcm.o bn_fast_mp_invmod.o bn_mp_invmod.o \
bn_mp_reduce.o bn_mp_montgomery_setup.o bn_fast_mp_montgomery_reduce.o bn_mp_montgomery_reduce.o \
bn_mp_exptmod_fast.o bn_mp_exptmod.o bn_mp_2expt.o bn_mp_n_root.o bn_mp_jacobi.o bn_reverse.o \
bn_mp_count_bits.o bn_mp_read_unsigned_bin.o bn_mp_read_signed_bin.o bn_mp_to_unsigned_bin.o \
bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o \
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o
libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
ranlib libtommath.a
#make a profiled library (takes a while!!!)
#
# This will build the library with profile generation
# then run the test demo and rebuild the library.
#
# So far I've seen improvements in the MP math
profiled:
make -f makefile.icc CFLAGS="$(CFLAGS) -prof_gen -DTESTING" timing
./ltmtest
rm -f *.a *.o ltmtest
make -f makefile.icc CFLAGS="$(CFLAGS) -prof_use"
#make a single object profiled library
profiled_single:
perl gen.pl
$(CC) $(CFLAGS) -prof_gen -DTESTING -c mpi.c -o mpi.o
$(CC) $(CFLAGS) -DTESTING -DTIMER demo/demo.c mpi.o -o ltmtest
./ltmtest
rm -f *.o ltmtest
$(CC) $(CFLAGS) -prof_use -ip -DTESTING -c mpi.c -o mpi.o
$(AR) $(ARFLAGS) libtommath.a mpi.o
ranlib libtommath.a
install: libtommath.a
install -d -g root -o root $(DESTDIR)$(LIBPATH)
install -d -g root -o root $(DESTDIR)$(INCPATH)
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
test: libtommath.a demo/demo.o
$(CC) demo/demo.o libtommath.a -o test
mtest: test
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest
timing: libtommath.a
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest
clean:
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.il etc/*.il *.dyn
cd etc ; make clean
cd pics ; make clean

View File

@ -29,7 +29,8 @@ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \ bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \ bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \ bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
bn_mp_init_set.obj bn_mp_init_set_int.obj
library: $(OBJECTS) library: $(OBJECTS)
lib /out:tommath.lib $(OBJECTS) lib /out:tommath.lib $(OBJECTS)

Binary file not shown.

View File

@ -452,7 +452,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
} }
/* setup dest */ /* setup dest */
olduse = c->used; olduse = c->used;
c->used = digs; c->used = digs;
{ {
@ -779,7 +779,7 @@ mp_2expt (mp_int * a, int b)
a->used = b / DIGIT_BIT + 1; a->used = b / DIGIT_BIT + 1;
/* put the single bit in its place */ /* put the single bit in its place */
a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT); a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
return MP_OKAY; return MP_OKAY;
} }
@ -1142,10 +1142,14 @@ mp_clamp (mp_int * a)
void void
mp_clear (mp_int * a) mp_clear (mp_int * a)
{ {
int i;
/* only do anything if a hasn't been freed previously */ /* only do anything if a hasn't been freed previously */
if (a->dp != NULL) { if (a->dp != NULL) {
/* first zero the digits */ /* first zero the digits */
memset (a->dp, 0, sizeof (mp_digit) * a->used); for (i = 0; i < a->used; i++) {
a->dp[i] = 0;
}
/* free ram */ /* free ram */
XFREE(a->dp); XFREE(a->dp);
@ -1677,7 +1681,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
*/ */
/* get sign before writing to c */ /* get sign before writing to c */
x.sign = a->sign; x.sign = x.used == 0 ? MP_ZPOS : a->sign;
if (c != NULL) { if (c != NULL) {
mp_clamp (&q); mp_clamp (&q);
@ -3083,15 +3087,22 @@ int mp_grow (mp_int * a, int size)
*/ */
#include <tommath.h> #include <tommath.h>
/* init a new bigint */ /* init a new mp_int */
int mp_init (mp_int * a) int mp_init (mp_int * a)
{ {
int i;
/* allocate memory required and clear it */ /* allocate memory required and clear it */
a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC); a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
if (a->dp == NULL) { if (a->dp == NULL) {
return MP_MEM; return MP_MEM;
} }
/* set the digits to zero */
for (i = 0; i < MP_PREC; i++) {
a->dp[i] = 0;
}
/* set the used to zero, allocated digits to the default precision /* set the used to zero, allocated digits to the default precision
* and sign to positive */ * and sign to positive */
a->used = 0; a->used = 0;
@ -3753,9 +3764,6 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
goto X0Y0; goto X0Y0;
/* now shift the digits */ /* now shift the digits */
x0.sign = x1.sign = a->sign;
y0.sign = y1.sign = b->sign;
x0.used = y0.used = B; x0.used = y0.used = B;
x1.used = a->used - B; x1.used = a->used - B;
y1.used = b->used - B; y1.used = b->used - B;
@ -4484,7 +4492,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
res = s_mp_mul (a, b, c); res = s_mp_mul (a, b, c);
} }
} }
c->sign = neg; c->sign = (c->used > 0) ? neg : MP_ZPOS;
return res; return res;
} }
@ -6090,7 +6098,8 @@ mp_reduce_2k_setup(mp_int *a, mp_digit *d)
/* determines if mp_reduce_2k can be used */ /* determines if mp_reduce_2k can be used */
int mp_reduce_is_2k(mp_int *a) int mp_reduce_is_2k(mp_int *a)
{ {
int ix, iy, iz, iw; int ix, iy, iw;
mp_digit iz;
if (a->used == 0) { if (a->used == 0) {
return 0; return 0;
@ -6107,7 +6116,7 @@ int mp_reduce_is_2k(mp_int *a)
return 0; return 0;
} }
iz <<= 1; iz <<= 1;
if (iz > (int)MP_MASK) { if (iz > (mp_digit)MP_MASK) {
++iw; ++iw;
iz = 1; iz = 1;
} }
@ -8396,14 +8405,16 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
CPU /Compiler /MUL CUTOFF/SQR CUTOFF CPU /Compiler /MUL CUTOFF/SQR CUTOFF
------------------------------------------------------------- -------------------------------------------------------------
Intel P4 /GCC v3.2 / 70/ 108 Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build
AMD Athlon XP /GCC v3.2 / 109/ 127 Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build
Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build
Intel P4 Northwood /ICC v8.0 / 54/ 76/profiled_single build
AMD Athlon XP /GCC v3.2 / 109/ 127/
*/ */
/* configured for a AMD XP Thoroughbred core with etc/tune.c */ int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */ KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400; TOOM_SQR_CUTOFF = 400;

66
pretty.build Normal file
View File

@ -0,0 +1,66 @@
#!/bin/perl -w
#
# Cute little builder for perl
# Total waste of development time...
#
# This will build all the object files and then the archive .a file
# requires GCC, GNU make and a sense of humour.
#
# Tom St Denis
use strict;
my $count = 0;
my $starttime = time;
my $rate = 0;
print "Scanning for source files...\n";
foreach my $filename (glob "*.c") {
++$count;
}
print "Source files to build: $count\nBuilding...\n";
my $i = 0;
my $lines = 0;
my $filesbuilt = 0;
foreach my $filename (glob "*.c") {
printf("Building %3.2f%%, ", (++$i/$count)*100.0);
if ($i % 4 == 0) { print "/, "; }
if ($i % 4 == 1) { print "-, "; }
if ($i % 4 == 2) { print "\\, "; }
if ($i % 4 == 3) { print "|, "; }
if ($rate > 0) {
my $tleft = ($count - $i) / $rate;
my $tsec = $tleft%60;
my $tmin = ($tleft/60)%60;
my $thour = ($tleft/3600)%60;
printf("%2d:%02d:%02d left, ", $thour, $tmin, $tsec);
}
my $cnt = ($i/$count)*30.0;
my $x = 0;
print "[";
for (; $x < $cnt; $x++) { print "#"; }
for (; $x < 30; $x++) { print " "; }
print "]\r";
my $tmp = $filename;
$tmp =~ s/\.c/".o"/ge;
if (open(SRC, "<$tmp")) {
close SRC;
} else {
!system("make $tmp > /dev/null 2>/dev/null") or die "\nERROR: Failed to make $tmp!!!\n";
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
++$lines while (<SRC>);
close SRC or die "Error closing $filename after reading: $!";
++$filesbuilt;
}
# update timer
if (time != $starttime) {
my $delay = time - $starttime;
$rate = $i/$delay;
}
}
# finish building the library
printf("\nFinished building source (%d seconds, %3.2f files per second).\n", time - $starttime, $rate);
print "Compiled approximately $filesbuilt files and $lines lines of code.\n";
print "Doing final make (building archive...)\n";
!system("make > /dev/null 2>/dev/null") or die "\nERROR: Failed to perform last make command!!!\n";
print "done.\n";

Binary file not shown.

View File

@ -258,7 +258,7 @@ floating point is meant to be implemented in hardware the precision of the manti
a mantissa of much larger precision than hardware alone can efficiently support. This approach could be useful where a mantissa of much larger precision than hardware alone can efficiently support. This approach could be useful where
scientific applications must minimize the total output error over long calculations. scientific applications must minimize the total output error over long calculations.
Another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$). Yet another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$).
In fact the library discussed within this text has already been used to form a polynomial basis library\footnote{See \url{http://poly.libtomcrypt.org} for more details.}. In fact the library discussed within this text has already been used to form a polynomial basis library\footnote{See \url{http://poly.libtomcrypt.org} for more details.}.
\subsection{Benefits of Multiple Precision Arithmetic} \subsection{Benefits of Multiple Precision Arithmetic}
@ -316,7 +316,7 @@ the reader how the algorithms fit together as well as where to start on various
\section{Discussion and Notation} \section{Discussion and Notation}
\subsection{Notation} \subsection{Notation}
A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1} ... x_1 x_0)_{ \beta }$ and represent A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1}, \ldots, x_1, x_0)_{ \beta }$ and represent
the integer $x \equiv \sum_{i=0}^{n-1} x_i\beta^i$. The elements of the array $x$ are said to be the radix $\beta$ digits the integer $x \equiv \sum_{i=0}^{n-1} x_i\beta^i$. The elements of the array $x$ are said to be the radix $\beta$ digits
of the integer. For example, $x = (1,2,3)_{10}$ would represent the integer of the integer. For example, $x = (1,2,3)_{10}$ would represent the integer
$1\cdot 10^2 + 2\cdot10^1 + 3\cdot10^0 = 123$. $1\cdot 10^2 + 2\cdot10^1 + 3\cdot10^0 = 123$.
@ -339,12 +339,11 @@ algorithms will be used to establish the relevant theory which will subsequently
precision algorithm to solve the same problem. precision algorithm to solve the same problem.
\subsection{Precision Notation} \subsection{Precision Notation}
For the purposes of this text a single precision variable must be able to represent integers in the range The variable $\beta$ represents the radix of a single digit of a multiple precision integer and
$0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range must be of the form $q^p$ for $q, p \in \Z^+$. A single precision variable must be able to represent integers in
$0 \le x < q \beta^2$. The variable $\beta$ represents the radix of a single digit of a multiple precision integer and the range $0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range
must be of the form $q^p$ for $q, p \in \Z^+$. The extra radix-$q$ factor allows additions and subtractions to proceed $0 \le x < q \beta^2$. The extra radix-$q$ factor allows additions and subtractions to proceed without truncation of the
without truncation of the carry. Since all modern computers are binary, it is assumed that $q$ is two, for all intents carry. Since all modern computers are binary, it is assumed that $q$ is two.
and purposes.
\index{mp\_digit} \index{mp\_word} \index{mp\_digit} \index{mp\_word}
Within the source code that will be presented for each algorithm, the data type \textbf{mp\_digit} will represent Within the source code that will be presented for each algorithm, the data type \textbf{mp\_digit} will represent
@ -376,7 +375,7 @@ the $/$ division symbol is used the intention is to perform an integer division
$5/2 = 2$ which will often be written as $\lfloor 5/2 \rfloor = 2$ for clarity. When an expression is written as a $5/2 = 2$ which will often be written as $\lfloor 5/2 \rfloor = 2$ for clarity. When an expression is written as a
fraction a real value division is implied, for example ${5 \over 2} = 2.5$. fraction a real value division is implied, for example ${5 \over 2} = 2.5$.
The norm of a multiple precision integer, for example, $\vert \vert x \vert \vert$ will be used to represent the number of digits in the representation The norm of a multiple precision integer, for example $\vert \vert x \vert \vert$, will be used to represent the number of digits in the representation
of the integer. For example, $\vert \vert 123 \vert \vert = 3$ and $\vert \vert 79452 \vert \vert = 5$. of the integer. For example, $\vert \vert 123 \vert \vert = 3$ and $\vert \vert 79452 \vert \vert = 5$.
\subsection{Work Effort} \subsection{Work Effort}
@ -569,7 +568,7 @@ By building outwards from a base foundation instead of using a parallel design m
highly modular. Being highly modular is a desirable property of any project as it often means the resulting product highly modular. Being highly modular is a desirable property of any project as it often means the resulting product
has a small footprint and updates are easy to perform. has a small footprint and updates are easy to perform.
Usually when I start a project I will begin with the header file. I define the data types I think I will need and Usually when I start a project I will begin with the header files. I define the data types I think I will need and
prototype the initial functions that are not dependent on other functions (within the library). After I prototype the initial functions that are not dependent on other functions (within the library). After I
implement these base functions I prototype more dependent functions and implement them. The process repeats until implement these base functions I prototype more dependent functions and implement them. The process repeats until
I implement all of the functions I require. For example, in the case of LibTomMath I implemented functions such as I implement all of the functions I require. For example, in the case of LibTomMath I implemented functions such as
@ -619,14 +618,26 @@ any such data type but it does provide for making composite data types known as
used within LibTomMath. used within LibTomMath.
\index{mp\_int} \index{mp\_int}
\begin{verbatim} \begin{figure}[here]
typedef struct { \begin{center}
int used, alloc, sign; \begin{small}
mp_digit *dp; %\begin{verbatim}
} mp_int; \begin{tabular}{|l|}
\end{verbatim} \hline
typedef struct \{ \\
\hspace{3mm}int used, alloc, sign;\\
\hspace{3mm}mp\_digit *dp;\\
\} \textbf{mp\_int}; \\
\hline
\end{tabular}
%\end{verbatim}
\end{small}
\caption{The mp\_int Structure}
\label{fig:mpint}
\end{center}
\end{figure}
The mp\_int structure can be broken down as follows. The mp\_int structure (fig. \ref{fig:mpint}) can be broken down as follows.
\begin{enumerate} \begin{enumerate}
\item The \textbf{used} parameter denotes how many digits of the array \textbf{dp} contain the digits used to represent \item The \textbf{used} parameter denotes how many digits of the array \textbf{dp} contain the digits used to represent
@ -701,9 +712,10 @@ fault by dereferencing memory not owned by the application.
In the case of LibTomMath the only errors that are checked for are related to inappropriate inputs (division by zero for In the case of LibTomMath the only errors that are checked for are related to inappropriate inputs (division by zero for
instance) and memory allocation errors. It will not check that the mp\_int passed to any function is valid nor instance) and memory allocation errors. It will not check that the mp\_int passed to any function is valid nor
will it check pointers for validity. Any function that can cause a runtime error will return an error code as an will it check pointers for validity. Any function that can cause a runtime error will return an error code as an
\textbf{int} data type with one of the following values. \textbf{int} data type with one of the following values (fig \ref{fig:errcodes}).
\index{MP\_OKAY} \index{MP\_VAL} \index{MP\_MEM} \index{MP\_OKAY} \index{MP\_VAL} \index{MP\_MEM}
\begin{figure}[here]
\begin{center} \begin{center}
\begin{tabular}{|l|l|} \begin{tabular}{|l|l|}
\hline \textbf{Value} & \textbf{Meaning} \\ \hline \textbf{Value} & \textbf{Meaning} \\
@ -713,6 +725,9 @@ will it check pointers for validity. Any function that can cause a runtime erro
\hline \hline
\end{tabular} \end{tabular}
\end{center} \end{center}
\caption{LibTomMath Error Codes}
\label{fig:errcodes}
\end{figure}
When an error is detected within a function it should free any memory it allocated, often during the initialization of When an error is detected within a function it should free any memory it allocated, often during the initialization of
temporary mp\_ints, and return as soon as possible. The goal is to leave the system in the same state it was when the temporary mp\_ints, and return as soon as possible. The goal is to leave the system in the same state it was when the
@ -748,6 +763,7 @@ to zero. The \textbf{used} count set to zero and \textbf{sign} set to \textbf{M
An mp\_int is said to be initialized if it is set to a valid, preferably default, state such that all of the members of the An mp\_int is said to be initialized if it is set to a valid, preferably default, state such that all of the members of the
structure are set to valid values. The mp\_init algorithm will perform such an action. structure are set to valid values. The mp\_init algorithm will perform such an action.
\index{mp\_init}
\begin{figure}[here] \begin{figure}[here]
\begin{center} \begin{center}
\begin{tabular}{l} \begin{tabular}{l}
@ -770,17 +786,23 @@ structure are set to valid values. The mp\_init algorithm will perform such an
\end{figure} \end{figure}
\textbf{Algorithm mp\_init.} \textbf{Algorithm mp\_init.}
The \textbf{MP\_PREC} name represents a constant\footnote{Defined in the ``tommath.h'' header file within LibTomMath.} The purpose of this function is to initialize an mp\_int structure so that the rest of the library can properly
used to dictate the minimum precision of allocated mp\_int integers. Ideally, it is at least equal to $32$ since for most manipulte it. It is assumed that the input may not have had any of its members previously initialized which is certainly
purposes that will be more than enough. a valid assumption if the input resides on the stack.
Memory for the default number of digits is allocated first. If the allocation fails the algorithm returns immediately Before any of the members such as \textbf{sign}, \textbf{used} or \textbf{alloc} are initialized the memory for
with the \textbf{MP\_MEM} error code. If the allocation succeeds the remaining members of the mp\_int structure the digits is allocated. If this fails the function returns before setting any of the other members. The \textbf{MP\_PREC}
must be initialized to reflect the default initial state. name represents a constant\footnote{Defined in the ``tommath.h'' header file within LibTomMath.}
used to dictate the minimum precision of newly initialized mp\_int integers. Ideally, it is at least equal to the smallest
precision number you'll be working with.
The allocated digits are all set to zero (step three) to ensure they are in a known state. The \textbf{sign}, \textbf{used} Allocating a block of digits at first instead of a single digit has the benefit of lowering the number of usually slow
and \textbf{alloc} are subsequently initialized to represent the zero integer. By step seven the algorithm returns a success heap operations later functions will have to perform in the future. If \textbf{MP\_PREC} is set correctly the slack
code and the mp\_int $a$ has been successfully initialized to a valid state representing the integer zero. memory and the number of heap operations will be trivial.
Once the allocation has been made the digits have to be set to zero as well as the \textbf{used}, \textbf{sign} and
\textbf{alloc} members initialized. This ensures that the mp\_int will always represent the default state of zero regardless
of the original condition of the input.
\textbf{Remark.} \textbf{Remark.}
This function introduces the idiosyncrasy that all iterative loops, commonly initiated with the ``for'' keyword, iterate incrementally This function introduces the idiosyncrasy that all iterative loops, commonly initiated with the ``for'' keyword, iterate incrementally
@ -796,19 +818,21 @@ One immediate observation of this initializtion function is that it does not ret
is assumed that the caller has already allocated memory for the mp\_int structure, typically on the application stack. The is assumed that the caller has already allocated memory for the mp\_int structure, typically on the application stack. The
call to mp\_init() is used only to initialize the members of the structure to a known default state. call to mp\_init() is used only to initialize the members of the structure to a known default state.
Before any of the other members of the structure are initialized memory from the application heap is allocated with Here we see (line @23,XMALLOC@) the memory allocation is performed first. This allows us to exit cleanly and quickly
the calloc() function (line @22,calloc@). The size of the allocated memory is large enough to hold \textbf{MP\_PREC} if there is an error. If the allocation fails the routine will return \textbf{MP\_MEM} to the caller to indicate there
mp\_digit variables. The calloc() function is used instead\footnote{calloc() will allocate memory in the same was a memory error. The function XMALLOC is what actually allocates the memory. Technically XMALLOC is not a function
manner as malloc() except that it also sets the contents to zero upon successfully allocating the memory.} of malloc() but a macro defined in ``tommath.h``. By default, XMALLOC will evaluate to malloc() which is the C library's built--in
since digits have to be set to zero for the function to finish correctly. The \textbf{OPT\_CAST} token is a macro memory allocation routine.
definition which will turn into a cast from void * to mp\_digit * for C++ compilers. It is not required for C compilers.
After the memory has been successfully allocated the remainder of the members are initialized In order to assure the mp\_int is in a known state the digits must be set to zero. On most platforms this could have been
accomplished by using calloc() instead of malloc(). However, to correctly initialize a integer type to a given value in a
portable fashion you have to actually assign the value. The for loop (line @28,for@) performs this required
operation.
After the memory has been successfully initialized the remainder of the members are initialized
(lines @29,used@ through @31,sign@) to their respective default states. At this point the algorithm has succeeded and (lines @29,used@ through @31,sign@) to their respective default states. At this point the algorithm has succeeded and
a success code is returned to the calling function. a success code is returned to the calling function. If this function returns \textbf{MP\_OKAY} it is safe to assume the
mp\_int structure has been properly initialized and is safe to use with other functions within the library.
If this function returns \textbf{MP\_OKAY} it is safe to assume the mp\_int structure has been properly initialized and
is safe to use with other functions within the library.
\subsection{Clearing an mp\_int} \subsection{Clearing an mp\_int}
When an mp\_int is no longer required by the application, the memory that has been allocated for its digits must be When an mp\_int is no longer required by the application, the memory that has been allocated for its digits must be
@ -819,7 +843,7 @@ returned to the application's memory pool with the mp\_clear algorithm.
\begin{tabular}{l} \begin{tabular}{l}
\hline Algorithm \textbf{mp\_clear}. \\ \hline Algorithm \textbf{mp\_clear}. \\
\textbf{Input}. An mp\_int $a$ \\ \textbf{Input}. An mp\_int $a$ \\
\textbf{Output}. The memory for $a$ is freed for reuse. \\ \textbf{Output}. The memory for $a$ shall be deallocated. \\
\hline \\ \hline \\
1. If $a$ has been previously freed then return(\textit{MP\_OKAY}). \\ 1. If $a$ has been previously freed then return(\textit{MP\_OKAY}). \\
2. for $n$ from 0 to $a.used - 1$ do \\ 2. for $n$ from 0 to $a.used - 1$ do \\
@ -836,32 +860,31 @@ returned to the application's memory pool with the mp\_clear algorithm.
\end{figure} \end{figure}
\textbf{Algorithm mp\_clear.} \textbf{Algorithm mp\_clear.}
This algorithm releases the memory allocated for an mp\_int back into the memory pool for reuse. It is designed This algorithm accomplishes two goals. First, it clears the digits and the other mp\_int members. This ensures that
such that a given mp\_int structure can be cleared multiple times between initializations without attempting to if a developer accidentally re-uses a cleared structure it is less likely to cause problems. The second goal
free the memory twice\footnote{In ISO C for example, calling free() twice on the same memory block causes undefinied is to free the allocated memory.
behaviour.}.
The first step determines if the mp\_int structure has been marked as free already. If it has, the algorithm returns The logic behind the algorithm is extended by marking cleared mp\_int structures so that subsequent calls to this
success immediately as no further actions are required. Otherwise, the algorithm will proceed to put the structure algorithm will not try to free the memory multiple times. Cleared mp\_ints are detectable by having a pre-defined invalid
in a known empty and otherwise invalid state. First the digits of the mp\_int are set to zero. The memory that has been allocated for the digit pointer \textbf{dp} setting.
digits is then freed. The \textbf{used} and \textbf{alloc} counts are both set to zero and the \textbf{sign} set to
\textbf{MP\_ZPOS}. This known fixed state for cleared mp\_int structures will make debuging easier for the end
developer. That is, if they spot (via their debugger) an mp\_int they are using that is in this state it will be
obvious that they erroneously and prematurely cleared the mp\_int structure.
Note that once an mp\_int has been cleared the mp\_int structure is no longer in a valid state for any other algorithm Once an mp\_int has been cleared the mp\_int structure is no longer in a valid state for any other algorithm
with the exception of algorithms mp\_init, mp\_init\_copy, mp\_init\_size and mp\_clear. with the exception of algorithms mp\_init, mp\_init\_copy, mp\_init\_size and mp\_clear.
EXAM,bn_mp_clear.c EXAM,bn_mp_clear.c
The ``if'' statement (line @21,a->dp != NULL@) prevents the heap from being corrupted if a user double-frees an The algorithm only operates on the mp\_int if it hasn't been previously cleared. The if statement (line @23,a->dp != NULL@)
mp\_int. This is because once the memory is freed the pointer is set to \textbf{NULL} (line @30,NULL@). checks to see if the \textbf{dp} member is not \textbf{NULL}. If the mp\_int is a valid mp\_int then \textbf{dp} cannot be
\textbf{NULL} in which case the if statement will evaluate to true.
Without the check, code that accidentally calls mp\_clear twice for a given mp\_int structure would try to free the memory The digits of the mp\_int are cleared by the for loop (line @25,for@) which assigns a zero to every digit. Similar to mp\_init()
allocated for the digits twice. This may cause some C libraries to signal a fault. By setting the pointer to the digits are assigned zero instead of using block memory operations (such as memset()) since this is more portable.
\textbf{NULL} it helps debug code that may inadvertently free the mp\_int before it is truly not needed, because attempts
to reference digits should fail immediately. The allocated digits are set to zero before being freed (line @24,memset@). The digits are deallocated off the heap via the XFREE macro. Similar to XMALLOC the XFREE macro actually evaluates to
This is ideal for cryptographic situations where the integer that the mp\_int represents might need to be kept a secret. a standard C library function. In this case the free() function. Since free() only deallocates the memory the pointer
still has to be reset to \textbf{NULL} manually (line @33,NULL@).
Now that the digits have been cleared and deallocated the other members are set to their final values (lines @34,= 0@ and @35,ZPOS@).
\section{Maintenance Algorithms} \section{Maintenance Algorithms}
@ -889,7 +912,7 @@ must be re-sized appropriately to accomodate the result. The mp\_grow algorithm
1. if $a.alloc \ge b$ then return(\textit{MP\_OKAY}) \\ 1. if $a.alloc \ge b$ then return(\textit{MP\_OKAY}) \\
2. $u \leftarrow b\mbox{ (mod }MP\_PREC\mbox{)}$ \\ 2. $u \leftarrow b\mbox{ (mod }MP\_PREC\mbox{)}$ \\
3. $v \leftarrow b + 2 \cdot MP\_PREC - u$ \\ 3. $v \leftarrow b + 2 \cdot MP\_PREC - u$ \\
4. Re-Allocate the array of digits $a$ to size $v$ \\ 4. Re-allocate the array of digits $a$ to size $v$ \\
5. If the allocation failed then return(\textit{MP\_MEM}). \\ 5. If the allocation failed then return(\textit{MP\_MEM}). \\
6. for n from a.alloc to $v - 1$ do \\ 6. for n from a.alloc to $v - 1$ do \\
\hspace{+3mm}6.1 $a_n \leftarrow 0$ \\ \hspace{+3mm}6.1 $a_n \leftarrow 0$ \\
@ -914,15 +937,19 @@ assumed to contain undefined values they are initially set to zero.
EXAM,bn_mp_grow.c EXAM,bn_mp_grow.c
The first step is to see if we actually need to perform a re-allocation at all (line @24,a->alloc < size@). If a reallocation A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @23,if@) checks
must occur the digit count is padded upwards to help prevent many trivial reallocations (line @28,size@). Next the reallocation is performed if the \textbf{alloc} member of the mp\_int is smaller than the requested digit count. If the count is not larger than \textbf{alloc}
and the return of realloc() is stored in a temporary pointer named $tmp$ (line @36,realloc@). The return is stored in a temporary the function skips the re-allocation part thus saving time.
instead of $a.dp$ to prevent the code from losing the original pointer in case the reallocation fails. Had the return been stored
in $a.dp$ instead there would be no way to reclaim the heap originally used.
If the reallocation fails the function will return \textbf{MP\_MEM} (line @39,return@), otherwise, the value of $tmp$ is assigned When a re-allocation is performed it is turned into an optimal request to save time in the future. The requested digit count is
to the pointer $a.dp$ and the function continues. A simple for loop from line @48,a->alloc@ to line @50,}@ will zero all digits padded upwards to 2nd multiple of \textbf{MP\_PREC} larger than \textbf{alloc} (line @25, size@). The XREALLOC function is used
that were above the old \textbf{alloc} limit to make sure the integer is in a known state. to re-allocate the memory. As per the other functions XREALLOC is actually a macro which evaluates to realloc by default. The realloc
function leaves the base of the allocation intact which means the first \textbf{alloc} digits of the mp\_int are the same as before
the re-allocation. All that is left is to clear the newly allocated digits and return.
Note that the re-allocation result is actually stored in a temporary pointer $tmp$. This is to allow this function to return
an error with a valid pointer. Earlier releases of the library stored the result of XREALLOC into the mp\_int $a$. That would
result in a memory leak if XREALLOC ever failed.
\subsection{Initializing Variable Precision mp\_ints} \subsection{Initializing Variable Precision mp\_ints}
Occasionally the number of digits required will be known in advance of an initialization, based on, for example, the size Occasionally the number of digits required will be known in advance of an initialization, based on, for example, the size
@ -970,7 +997,7 @@ The number of digits $b$ requested is padded (line @22,MP_PREC@) by first augmen
mp\_int is placed in a default state representing the integer zero. Otherwise, the error code \textbf{MP\_MEM} will be mp\_int is placed in a default state representing the integer zero. Otherwise, the error code \textbf{MP\_MEM} will be
returned (line @27,return@). returned (line @27,return@).
The digits are allocated and set to zero at the same time with the calloc() function (line @25,calloc@). The The digits are allocated and set to zero at the same time with the calloc() function (line @25,XCALLOC@). The
\textbf{used} count is set to zero, the \textbf{alloc} count set to the padded digit count and the \textbf{sign} flag set \textbf{used} count is set to zero, the \textbf{alloc} count set to the padded digit count and the \textbf{sign} flag set
to \textbf{MP\_ZPOS} to achieve a default valid mp\_int state (lines @29,used@, @30,alloc@ and @31,sign@). If the function to \textbf{MP\_ZPOS} to achieve a default valid mp\_int state (lines @29,used@, @30,alloc@ and @31,sign@). If the function
returns succesfully then it is correct to assume that the mp\_int structure is in a valid state for the remainder of the returns succesfully then it is correct to assume that the mp\_int structure is in a valid state for the remainder of the

File diff suppressed because it is too large Load Diff