added libtommath-0.31

This commit is contained in:
Tom St Denis 2004-08-09 22:15:59 +00:00 committed by Steffen Jaeckel
parent 350578d400
commit 8eaa98807b
75 changed files with 5111 additions and 5218 deletions

BIN
bn.pdf

Binary file not shown.

2
bn.tex
View File

@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
\title{LibTomMath User Manual \\ v0.30}
\title{LibTomMath User Manual \\ v0.31}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been

View File

@ -88,7 +88,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
}
/* setup dest */
olduse = c->used;
olduse = c->used;
c->used = digs;
{

View File

@ -36,7 +36,7 @@ mp_2expt (mp_int * a, int b)
a->used = b / DIGIT_BIT + 1;
/* put the single bit in its place */
a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT);
a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
return MP_OKAY;
}

View File

@ -18,10 +18,14 @@
void
mp_clear (mp_int * a)
{
int i;
/* only do anything if a hasn't been freed previously */
if (a->dp != NULL) {
/* first zero the digits */
memset (a->dp, 0, sizeof (mp_digit) * a->used);
for (i = 0; i < a->used; i++) {
a->dp[i] = 0;
}
/* free ram */
XFREE(a->dp);

View File

@ -187,7 +187,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
*/
/* get sign before writing to c */
x.sign = a->sign;
x.sign = x.used == 0 ? MP_ZPOS : a->sign;
if (c != NULL) {
mp_clamp (&q);

View File

@ -14,15 +14,22 @@
*/
#include <tommath.h>
/* init a new bigint */
/* init a new mp_int */
int mp_init (mp_int * a)
{
int i;
/* allocate memory required and clear it */
a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC);
a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
if (a->dp == NULL) {
return MP_MEM;
}
/* set the digits to zero */
for (i = 0; i < MP_PREC; i++) {
a->dp[i] = 0;
}
/* set the used to zero, allocated digits to the default precision
* and sign to positive */
a->used = 0;

View File

@ -76,9 +76,6 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
goto X0Y0;
/* now shift the digits */
x0.sign = x1.sign = a->sign;
y0.sign = y1.sign = b->sign;
x0.used = y0.used = B;
x1.used = a->used - B;
y1.used = b->used - B;

View File

@ -43,6 +43,6 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
res = s_mp_mul (a, b, c);
}
}
c->sign = neg;
c->sign = (c->used > 0) ? neg : MP_ZPOS;
return res;
}

View File

@ -17,7 +17,8 @@
/* determines if mp_reduce_2k can be used */
int mp_reduce_is_2k(mp_int *a)
{
int ix, iy, iz, iw;
int ix, iy, iw;
mp_digit iz;
if (a->used == 0) {
return 0;
@ -34,7 +35,7 @@ int mp_reduce_is_2k(mp_int *a)
return 0;
}
iz <<= 1;
if (iz > (int)MP_MASK) {
if (iz > (mp_digit)MP_MASK) {
++iw;
iz = 1;
}

View File

@ -18,14 +18,16 @@
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
-------------------------------------------------------------
Intel P4 /GCC v3.2 / 70/ 108
AMD Athlon XP /GCC v3.2 / 109/ 127
Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build
Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build
Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build
Intel P4 Northwood /ICC v8.0 / 54/ 76/profiled_single build
AMD Athlon XP /GCC v3.2 / 109/ 127/
*/
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400;

View File

@ -84,6 +84,7 @@ while (<IN>) {
$text[$line++] = $_;
last if ($_ =~ /tommath\.h/);
}
<SRC>;
}
$inline = 0;

View File

@ -1,3 +1,12 @@
August 9th, 2004
v0.31 -- "profiled" builds now :-) new timings for Intel Northwoods
-- Added "pretty" build target
-- Update mp_init() to actually assign 0's instead of relying on calloc()
-- "Wolfgang Ehrhardt" <Wolfgang.Ehrhardt@munich.netsurf.de> found a bug in mp_mul() where if
you multiply a negative by zero you get negative zero as the result. Oops.
-- J Harper from PeerSec let me toy with his AMD64 and I got 60-bit digits working properly
[this also means that I fixed a bug where if sizeof(int) < sizeof(mp_digit) it would bug]
April 11th, 2004
v0.30 -- Added "mp_toradix_n" which stores upto "n-1" least significant digits of an mp_int
-- Johan Lindh sent a patch so MSVC wouldn't whine about redefining malloc [in weird dll modes]

View File

@ -1,7 +1,5 @@
#include <time.h>
#define TESTING
#ifdef IOWNANATHLON
#include <unistd.h>
#define SLEEP sleep(4)
@ -11,49 +9,6 @@
#include "tommath.h"
#ifdef TIMER
ulong64 _tt;
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#ifdef __i386__
ulong64 a;
__asm__ __volatile__ ("rdtsc ":"=A" (a));
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
#else
#define TIMFUNC clock
#endif
ulong64 rdtsc(void) { return TIMFUNC() - _tt; }
void reset(void) { _tt = TIMFUNC(); }
#endif
void ndraw(mp_int *a, char *name)
{
char buf[4096];
@ -89,10 +44,6 @@ int myrng(unsigned char *dst, int len, void *dat)
}
#define DO2(x) x; x;
#define DO4(x) DO2(x); DO2(x);
#define DO8(x) DO4(x); DO4(x);
#define DO(x) DO8(x); DO8(x);
char cmd[4096], buf[4096];
int main(void)
@ -103,10 +54,6 @@ int main(void)
unsigned rr;
int i, n, err, cnt, ix, old_kara_m, old_kara_s;
#ifdef TIMER
ulong64 tt, CLK_PER_SEC;
FILE *log, *logb, *logc;
#endif
mp_init(&a);
mp_init(&b);
@ -117,11 +64,10 @@ int main(void)
srand(time(NULL));
#ifdef TESTING
// test mp_get_int
printf("Testing: mp_get_int\n");
for(i=0;i<1000;++i) {
t = (unsigned long)rand()*rand()+1;
t = ((unsigned long)rand()*rand()+1)&0xFFFFFFFF;
mp_set_int(&a,t);
if (t!=mp_get_int(&a)) {
printf("mp_get_int() bad result!\n");
@ -141,7 +87,7 @@ int main(void)
// test mp_sqrt
printf("Testing: mp_sqrt\n");
for (i=0;i<10000;++i) {
for (i=0;i<1000;++i) {
printf("%6d\r", i); fflush(stdout);
n = (rand()&15)+1;
mp_rand(&a,n);
@ -157,7 +103,7 @@ int main(void)
}
printf("\nTesting: mp_is_square\n");
for (i=0;i<100000;++i) {
for (i=0;i<1000;++i) {
printf("%6d\r", i); fflush(stdout);
/* test mp_is_square false negatives */
@ -186,11 +132,9 @@ int main(void)
}
printf("\n\n");
#endif
#ifdef TESTING
/* test for size */
for (ix = 16; ix < 512; ix++) {
for (ix = 10; ix < 256; ix++) {
printf("Testing (not safe-prime): %9d bits \r", ix); fflush(stdout);
err = mp_prime_random_ex(&a, 8, ix, (rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON, myrng, NULL);
if (err != MP_OKAY) {
@ -203,7 +147,7 @@ int main(void)
}
}
for (ix = 16; ix < 512; ix++) {
for (ix = 16; ix < 256; ix++) {
printf("Testing ( safe-prime): %9d bits \r", ix); fflush(stdout);
err = mp_prime_random_ex(&a, 8, ix, ((rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON)|LTM_PRIME_SAFE, myrng, NULL);
if (err != MP_OKAY) {
@ -225,9 +169,7 @@ int main(void)
}
printf("\n\n");
#endif
#ifdef TESTING
mp_read_radix(&a, "123456", 10);
mp_toradix_n(&a, buf, 10, 3);
printf("a == %s\n", buf);
@ -235,7 +177,6 @@ int main(void)
printf("a == %s\n", buf);
mp_toradix_n(&a, buf, 10, 30);
printf("a == %s\n", buf);
#endif
#if 0
@ -248,22 +189,6 @@ int main(void)
}
#endif
#if 0
{
mp_word aa, bb;
for (;;) {
aa = abs(rand()) & MP_MASK;
bb = abs(rand()) & MP_MASK;
if (MULT(aa,bb) != (aa*bb)) {
printf("%llu * %llu == %llu or %llu?\n", aa, bb, (ulong64)MULT(aa,bb), (ulong64)(aa*bb));
return 0;
}
}
}
#endif
#ifdef TESTING
/* test mp_cnt_lsb */
printf("testing mp_cnt_lsb...\n");
mp_set(&a, 1);
@ -274,12 +199,10 @@ int main(void)
}
mp_mul_2(&a, &a);
}
#endif
/* test mp_reduce_2k */
#ifdef TESTING
printf("Testing mp_reduce_2k...\n");
for (cnt = 3; cnt <= 384; ++cnt) {
for (cnt = 3; cnt <= 128; ++cnt) {
mp_digit tmp;
mp_2expt(&a, cnt);
mp_sub_d(&a, 2, &a); /* a = 2**cnt - 2 */
@ -289,7 +212,7 @@ int main(void)
printf("(%d)", mp_reduce_is_2k(&a));
mp_reduce_2k_setup(&a, &tmp);
printf("(%d)", tmp);
for (ix = 0; ix < 10000; ix++) {
for (ix = 0; ix < 1000; ix++) {
if (!(ix & 127)) {printf("."); fflush(stdout); }
mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2);
mp_copy(&c, &b);
@ -301,14 +224,11 @@ int main(void)
}
}
}
#endif
/* test mp_div_3 */
#ifdef TESTING
printf("Testing mp_div_3...\n");
mp_set(&d, 3);
for (cnt = 0; cnt < 1000000; ) {
for (cnt = 0; cnt < 10000; ) {
mp_digit r1, r2;
if (!(++cnt & 127)) printf("%9d\r", cnt);
@ -321,12 +241,10 @@ int main(void)
}
}
printf("\n\nPassed div_3 testing\n");
#endif
/* test the DR reduction */
#ifdef TESTING
printf("testing mp_dr_reduce...\n");
for (cnt = 2; cnt < 128; cnt++) {
for (cnt = 2; cnt < 32; cnt++) {
printf("%d digit modulus\n", cnt);
mp_grow(&a, cnt);
mp_zero(&a);
@ -334,7 +252,7 @@ int main(void)
a.dp[ix] = MP_MASK;
}
a.used = cnt;
mp_prime_next_prime(&a, 3, 0);
a.dp[0] = 3;
mp_rand(&b, cnt - 1);
mp_copy(&b, &c);
@ -346,206 +264,16 @@ int main(void)
mp_copy(&b, &c);
mp_mod(&b, &a, &b);
mp_dr_reduce(&c, &a, (1<<DIGIT_BIT)-a.dp[0]);
mp_dr_reduce(&c, &a, (((mp_digit)1)<<DIGIT_BIT)-a.dp[0]);
if (mp_cmp(&b, &c) != MP_EQ) {
printf("Failed on trial %lu\n", rr); exit(-1);
}
} while (++rr < 100000);
} while (++rr < 500);
printf("Passed DR test for %d digits\n", cnt);
}
#endif
#ifdef TIMER
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
reset();
sleep(1);
CLK_PER_SEC = rdtsc();
printf("CLK_PER_SEC == %lu\n", CLK_PER_SEC);
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_add(&a,&b,&c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_sub(&a,&b,&c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
/* do mult/square twice, first without karatsuba and second with */
mult_test:
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
for (ix = 0; ix < 2; ix++) {
printf("With%s Karatsuba\n", (ix==0)?"out":"");
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
reset();
rr = 0;
do {
DO(mp_mul(&a, &b, &c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
reset();
rr = 0;
do {
DO(mp_sqr(&a, &b));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
}
fclose(log);
}
expt_test:
{
char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
};
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) {
SLEEP;
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
for (rr = 0; rr < mp_count_bits(&a); rr++) {
mp_mul_2(&b, &b);
b.dp[0] |= lbit();
b.used += 1;
}
mp_sub_d(&a, 1, &c);
mp_mod(&b, &c, &b);
mp_set(&c, 3);
reset();
rr = 0;
do {
DO(mp_exptmod(&c, &b, &a, &d));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
if (mp_cmp_d(&d, 1)) {
printf("Different (%d)!!!\n", mp_count_bits(&a));
draw(&d);
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt);
}
}
fclose(log);
fclose(logb);
fclose(logc);
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
do {
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
} while (mp_cmp_d(&c, 1) != MP_EQ);
reset();
rr = 0;
do {
DO(mp_invmod(&b, &a, &c));
rr += 16;
} while (rdtsc() < (CLK_PER_SEC * 2));
tt = rdtsc();
mp_mulmod(&b, &c, &a, &d);
if (mp_cmp_d(&d, 1) != MP_EQ) {
printf("Failed to invert\n");
return 0;
}
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt);
}
fclose(log);
return 0;
#endif
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= 0;

291
demo/timing.c Normal file
View File

@ -0,0 +1,291 @@
#include <tommath.h>
#include <time.h>
ulong64 _tt;
#ifdef IOWNANATHLON
#include <unistd.h>
#define SLEEP sleep(4)
#else
#define SLEEP
#endif
void ndraw(mp_int *a, char *name)
{
char buf[4096];
printf("%s: ", name);
mp_toradix(a, buf, 64);
printf("%s\n", buf);
}
static void draw(mp_int *a)
{
ndraw(a, "");
}
unsigned long lfsr = 0xAAAAAAAAUL;
int lbit(void)
{
if (lfsr & 0x80000000UL) {
lfsr = ((lfsr << 1) ^ 0x8000001BUL) & 0xFFFFFFFFUL;
return 1;
} else {
lfsr <<= 1;
return 0;
}
}
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#ifdef __i386__
ulong64 a;
__asm__ __volatile__ ("rdtsc ":"=A" (a));
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
#else
#define TIMFUNC clock
#endif
#define DO(x) x; x;
//#define DO4(x) DO2(x); DO2(x);
//#define DO8(x) DO4(x); DO4(x);
//#define DO(x) DO8(x); DO8(x);
int main(void)
{
ulong64 tt, gg, CLK_PER_SEC;
FILE *log, *logb, *logc;
mp_int a, b, c, d, e, f;
int n, cnt, ix, old_kara_m, old_kara_s;
unsigned rr;
mp_init(&a);
mp_init(&b);
mp_init(&c);
mp_init(&d);
mp_init(&e);
mp_init(&f);
srand(time(NULL));
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
CLK_PER_SEC = TIMFUNC();
sleep(1);
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_add(&a,&b,&c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100000);
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
}
fclose(log);
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sub(&a,&b,&c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100000);
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
}
fclose(log);
/* do mult/square twice, first without karatsuba and second with */
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
for (ix = 0; ix < 1; ix++) {
printf("With%s Karatsuba\n", (ix==0)?"out":"");
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_mul(&a, &b, &c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100);
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
}
fclose(log);
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
for (cnt = 32; cnt <= 288; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sqr(&a, &b));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100);
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
}
fclose(log);
}
{
char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
};
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) {
SLEEP;
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
for (rr = 0; rr < (unsigned)mp_count_bits(&a); rr++) {
mp_mul_2(&b, &b);
b.dp[0] |= lbit();
b.used += 1;
}
mp_sub_d(&a, 1, &c);
mp_mod(&b, &c, &b);
mp_set(&c, 3);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_exptmod(&c, &b, &a, &d));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 10);
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
if (mp_cmp_d(&d, 1)) {
printf("Different (%d)!!!\n", mp_count_bits(&a));
draw(&d);
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), tt);
}
}
fclose(log);
fclose(logb);
fclose(logc);
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
do {
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
} while (mp_cmp_d(&c, 1) != MP_EQ);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_invmod(&b, &a, &c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 1000);
mp_mulmod(&b, &c, &a, &d);
if (mp_cmp_d(&d, 1) != MP_EQ) {
printf("Failed to invert\n");
return 0;
}
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt);
}
fclose(log);
return 0;
}

View File

@ -46,4 +46,5 @@ mont: mont.o
clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat \
*.da *.dyn *.dpi *~

67
etc/makefile.icc Normal file
View File

@ -0,0 +1,67 @@
CC = icc
CFLAGS += -I../
# optimize for SPEED
#
# -mcpu= can be pentium, pentiumpro (covers PII through PIII) or pentium4
# -ax? specifies make code specifically for ? but compatible with IA-32
# -x? specifies compile solely for ? [not specifically IA-32 compatible]
#
# where ? is
# K - PIII
# W - first P4 [Williamette]
# N - P4 Northwood
# P - P4 Prescott
# B - Blend of P4 and PM [mobile]
#
# Default to just generic max opts
CFLAGS += -O3 -xN -ip
# default lib name (requires install with root)
# LIBNAME=-ltommath
# libname when you can't install the lib with install
LIBNAME=../libtommath.a
#provable primes
pprime: pprime.o
$(CC) pprime.o $(LIBNAME) -o pprime
# portable [well requires clock()] tuning app
tune: tune.o
$(CC) tune.o $(LIBNAME) -o tune
# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
tune86: tune.c
nasm -f coff timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
# for cygwin
tune86c: tune.c
nasm -f gnuwin32 timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
#make tune86 for linux or any ELF format
tune86l: tune.c
nasm -f elf -DUSE_ELF timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
# spits out mersenne primes
mersenne: mersenne.o
$(CC) mersenne.o $(LIBNAME) -o mersenne
# fines DR safe primes for the given config
drprime: drprime.o
$(CC) drprime.o $(LIBNAME) -o drprime
# fines 2k safe primes for the given config
2kprime: 2kprime.o
$(CC) 2kprime.o $(LIBNAME) -o 2kprime
mont: mont.o
$(CC) mont.o $(LIBNAME) -o mont
clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il

View File

@ -1,16 +1,16 @@
224 20297071
448 15151383
672 13088682
896 11111587
1120 9240621
1344 8221878
1568 7227434
1792 6718051
2016 6042524
2240 5685200
2464 5240465
2688 4818032
2912 4412794
3136 4155883
3360 3927078
3584 3722138
224 1572
448 1740
672 1902
896 2116
1120 2324
1344 2484
1568 2548
1792 2772
2016 2958
2240 3058
2464 3276
2688 3436
2912 3542
3136 3702
3360 3926
3584 4074

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

After

Width:  |  Height:  |  Size: 6.1 KiB

View File

@ -1,7 +1,7 @@
513 745
769 282
1025 130
2049 20
2561 11
3073 6
4097 2
513 19933908
769 55707832
1025 119872576
2049 856114218
2561 1602741360
3073 2718192748
4097 6264335828

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.3 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

@ -1,6 +1,6 @@
521 783
607 585
1279 138
2203 39
3217 15
4253 6
521 18847776
607 24665920
1279 110036220
2203 414562036
3217 1108350966
4253 2286079370

View File

@ -1,7 +1,7 @@
532 1296
784 551
1036 283
1540 109
2072 52
3080 18
4116 7
532 9656134
784 23022274
1036 45227854
1540 129652848
2072 280625626
3080 845619480
4116 1866206400

View File

@ -1,17 +1,17 @@
set terminal png
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"
set terminal png
set size 1.75
set ylabel "Cycles per Operation"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,32 +0,0 @@
112 17364
224 8643
336 8867
448 6228
560 4737
672 2259
784 2899
896 1497
1008 1238
1120 1010
1232 870
1344 1265
1456 1102
1568 981
1680 539
1792 484
1904 722
2016 392
2128 604
2240 551
2352 511
2464 469
2576 263
2688 247
2800 227
2912 354
3024 336
3136 312
3248 296
3360 166
3472 155
3584 248

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

View File

@ -1,13 +0,0 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

View File

@ -1,16 +0,0 @@
224 11069160
448 9156136
672 8089755
896 7399424
1120 6389352
1344 5818648
1568 5257112
1792 4982160
2016 4527856
2240 4325312
2464 4051760
2688 3767640
2912 3612520
3136 3415208
3360 3258656
3584 3113360

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -1,7 +0,0 @@
513 664
769 256
1025 117
2049 17
2561 9
3073 5
4097 2

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

View File

@ -1,7 +0,0 @@
532 1088
784 460
1036 240
1540 92
2072 43
3080 15
4116 6

View File

@ -1,17 +0,0 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,24 +0,0 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

View File

@ -1,32 +0,0 @@
112 16248
224 8192
336 5320
448 3560
560 2728
672 2064
784 1704
896 2176
1008 1184
1120 976
1232 1280
1344 1176
1456 624
1568 912
1680 504
1792 452
1904 658
2016 608
2128 336
2240 312
2352 288
2464 264
2576 408
2688 376
2800 354
2912 198
3024 307
3136 173
3248 162
3360 256
3472 145
3584 226

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

View File

@ -1,17 +0,0 @@
896 322904
1344 151592
1792 90472
2240 59984
2688 42624
3136 31872
3584 24704
4032 19704
4480 16096
4928 13376
5376 11272
5824 9616
6272 8360
6720 7304
7168 1664
7616 1472
8064 1328

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.1 KiB

View File

@ -1,17 +0,0 @@
896 322872
1344 151688
1792 90480
2240 59984
2688 42656
3136 32144
3584 25840
4032 21328
4480 17856
4928 14928
5376 12856
5824 11256
6272 9880
6720 8984
7168 7928
7616 7200
8064 6576

View File

@ -1,17 +0,0 @@
896 415472
1344 223736
1792 141232
2240 97624
2688 71400
3136 54800
3584 16904
4032 13528
4480 10968
4928 9128
5376 7784
5824 6672
6272 5760
6720 5056
7168 4440
7616 3952
8064 3512

View File

@ -1,17 +0,0 @@
896 420464
1344 224800
1792 142808
2240 97704
2688 71416
3136 54504
3584 38320
4032 32360
4480 27576
4928 23840
5376 20688
5824 18264
6272 16176
6720 14440
7168 11688
7616 10752
8064 9936

View File

@ -1,16 +0,0 @@
224 9728504
448 8573648
672 7488096
896 6714064
1120 5950472
1344 5457400
1568 5038896
1792 4683632
2016 4384656
2240 4105976
2464 3871608
2688 3650680
2912 3463552
3136 3290016
3360 3135272
3584 2993848

View File

@ -1,33 +1,33 @@
920 374785
1142 242737
1371 176704
1596 134341
1816 105537
2044 85089
2268 70051
2490 58671
2716 49851
2937 42881
3162 37288
3387 32697
3608 28915
3836 25759
4057 23088
4284 20800
4508 18827
4730 17164
4956 15689
5180 14397
5398 13260
5628 12249
5852 11346
6071 10537
6298 9812
6522 9161
6742 8572
6971 8038
7195 2915
7419 2744
7644 2587
7866 2444
8090 2311
923 45612
1143 68010
1370 94894
1596 126514
1820 163014
2044 203564
2268 249156
2492 299226
2716 354138
2940 413022
3163 477406
3387 545876
3612 619044
3835 696754
4060 779174
4284 866216
4508 958100
4731 1055898
4954 1162294
5179 1267654
5404 1377572
5628 1503736
5852 1622310
6076 1746624
6299 1875390
6524 2009086
6748 2145990
6971 2289044
7196 2891644
7418 3064792
7644 3249780
7868 3455868
8092 3644238

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

After

Width:  |  Height:  |  Size: 6.6 KiB

View File

@ -1,33 +1,33 @@
924 374171
1147 243163
1371 177111
1596 134465
1819 105619
2044 85145
2266 70086
2488 58717
2715 49869
2939 42894
3164 37389
3387 33510
3610 29993
3836 27205
4060 24751
4281 22576
4508 20670
4732 19019
4954 17527
5180 16217
5404 15044
5624 14003
5849 13051
6076 12067
6300 11438
6524 10772
6748 10298
6972 9715
7195 9330
7416 8836
7644 8465
7864 8042
8091 7735
921 92388
1148 61410
1372 43799
1594 33047
1819 26913
2043 21996
2268 18453
2492 15623
2715 13378
2940 11626
3164 10252
3385 9291
3610 8348
3835 7615
4060 6928
4283 6401
4508 5836
4732 5387
4955 4985
5178 4614
5404 4300
5622 4005
5852 3742
6073 3502
6298 3262
6524 3137
6748 2967
6971 2807
7195 2679
7420 2571
7643 2442
7867 2324
8091 2235

View File

@ -1,13 +0,0 @@
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
Todo this type
make timing ; ltmtest
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
them all :-)
Have fun
Tom

View File

@ -1,16 +0,0 @@
224 8113248
448 6585584
672 5687678
896 4761144
1120 4111592
1344 3995154
1568 3532387
1792 3225400
2016 2963960
2240 2720112
2464 2533952
2688 2307168
2912 2287064
3136 2150160
3360 2035992
3584 1936304

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

View File

@ -1,7 +0,0 @@
513 195
769 68
1025 31
2049 4
2561 2
3073 1
4097 0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

View File

@ -1,7 +0,0 @@
532 393
784 158
1036 79
1540 27
2072 12
3080 4
4116 1

View File

@ -1,17 +0,0 @@
set terminal png color
set size 1.75
set ylabel "Operations per Second"
set xlabel "Operand size (bits)"
set output "addsub.png"
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
set output "mult.png"
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
set output "expt.png"
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
set output "invmod.png"
plot 'invmod.log' smooth bezier title "Modular Inverse"

View File

@ -1,24 +0,0 @@
<html>
<head>
<title>LibTomMath Log Plots</title>
</head>
<body>
<h1>Addition and Subtraction</h1>
<center><img src=addsub.png></center>
<hr>
<h1>Multipliers</h1>
<center><img src=mult.png></center>
<hr>
<h1>Exptmod</h1>
<center><img src=expt.png></center>
<hr>
<h1>Modular Inverse</h1>
<center><img src=invmod.png></center>
<hr>
</body>
</html>

View File

@ -1,32 +0,0 @@
112 13608
224 6872
336 4264
448 2792
560 2144
672 1560
784 1296
896 1672
1008 896
1120 736
1232 1024
1344 888
1456 472
1568 680
1680 373
1792 328
1904 484
2016 436
2128 232
2240 211
2352 200
2464 177
2576 293
2688 262
2800 251
2912 137
3024 216
3136 117
3248 113
3360 181
3472 98
3584 158

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.5 KiB

View File

@ -1,17 +0,0 @@
896 77600
1344 35776
1792 19688
2240 13248
2688 9424
3136 7056
3584 5464
4032 4368
4480 3568
4928 2976
5376 2520
5824 2152
6272 1872
6720 1632
7168 650
7616 576
8064 515

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

View File

@ -1,17 +0,0 @@
896 77752
1344 35832
1792 19688
2240 14704
2688 10832
3136 8336
3584 6600
4032 5424
4480 4648
4928 3976
5376 3448
5824 3016
6272 2664
6720 2384
7168 2120
7616 1912
8064 1752

View File

@ -1,17 +0,0 @@
896 128088
1344 63640
1792 37968
2240 25488
2688 18176
3136 13672
3584 4920
4032 3912
4480 3160
4928 2616
5376 2216
5824 1896
6272 1624
6720 1408
7168 1240
7616 1096
8064 984

View File

@ -1,17 +0,0 @@
896 127456
1344 63752
1792 37920
2240 25440
2688 18200
3136 13728
3584 10968
4032 9072
4480 7608
4928 6440
5376 5528
5824 4768
6272 4328
6720 3888
7168 3504
7616 3176
8064 2896

View File

@ -1,16 +0,0 @@
224 7355896
448 6162880
672 5218984
896 4622776
1120 3999320
1344 3629480
1568 3290384
1792 2954752
2016 2737056
2240 2563320
2464 2451928
2688 2310920
2912 2139048
3136 2034080
3360 1890800
3584 1808624

View File

@ -1,33 +1,33 @@
922 471095
1147 337137
1366 254327
1596 199732
1819 161225
2044 132852
2268 111493
2490 94864
2715 81745
2940 71187
3162 62575
3387 55418
3612 14540
3836 12944
4060 11627
4281 10546
4508 9502
4730 8688
4954 7937
5180 7273
5402 6701
5627 6189
5850 5733
6076 5310
6300 4933
6522 4631
6748 4313
6971 4064
7196 3801
7420 3576
7642 3388
7868 3191
8092 3020
924 26026
1146 37682
1370 51714
1595 68130
1820 86850
2043 107880
2267 131236
2490 156828
2716 184704
2940 214934
3162 247424
3388 282494
3608 308390
3834 345978
4060 386156
4282 427648
4505 471556
4731 517948
4954 566396
5180 618292
5402 670130
5628 725674
5852 783310
6076 843480
6300 905136
6524 969132
6748 1033680
6971 1100912
7195 1170954
7420 1252576
7643 1325038
7867 1413890
8091 1493140

View File

@ -1,33 +1,33 @@
922 470930
1148 337217
1372 254433
1596 199827
1820 161204
2043 132871
2267 111522
2488 94932
2714 81814
2939 71231
3164 62616
3385 55467
3611 44426
3836 40695
4060 37391
4283 34371
4508 31779
4732 29499
4956 27426
5177 25598
5403 23944
5628 22416
5851 21052
6076 19781
6299 18588
6523 17539
6746 16618
6972 15705
7196 13582
7420 13004
7643 12496
7868 11963
8092 11497
923 165854
1146 112539
1372 80388
1595 60051
1820 47498
2044 38017
2268 31935
2492 27373
2714 23798
2939 20630
3164 18198
3388 16191
3612 14538
3836 13038
4058 11683
4284 10915
4508 9998
4731 9271
4954 8555
5180 7910
5404 7383
5628 7012
5852 6527
6075 6175
6299 5737
6524 5398
6744 5110
6971 4864
7196 4567
7420 4371
7644 4182
7868 3981
8092 3758

View File

@ -1,16 +1,16 @@
224 16370431
448 13327848
672 11009401
896 9125342
1120 7930419
1344 7114040
1568 6506998
1792 5899346
2016 5435327
2240 5038931
2464 4696364
2688 4425678
2912 4134476
3136 3913280
3360 3692536
3584 3505219
224 2012
448 2208
672 2366
896 2532
1120 2682
1344 2838
1568 3016
1792 3146
2016 3318
2240 3538
2464 3756
2688 3914
2912 4060
3136 4216
3360 4392
3584 4550

View File

@ -12,7 +12,10 @@ CFLAGS += -O3 -funroll-loops
#x86 optimizations [should be valid for any GCC install though]
CFLAGS += -fomit-frame-pointer
VERSION=0.30
#debug
#CFLAGS += -g3
VERSION=0.31
default: libtommath.a
@ -20,7 +23,7 @@ default: libtommath.a
LIBNAME=libtommath.a
HEADERS=tommath.h
#LIBPATH-The directory for libtomcrypt to be installed to.
#LIBPATH-The directory for libtommath to be installed to.
#INCPATH-The directory to install the header files for libtommath.
#DATAPATH-The directory to install the pdf docs.
DESTDIR=
@ -58,6 +61,30 @@ libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
ranlib libtommath.a
#make a profiled library (takes a while!!!)
#
# This will build the library with profile generation
# then run the test demo and rebuild the library.
#
# So far I've seen improvements in the MP math
profiled:
make CFLAGS="$(CFLAGS) -fprofile-arcs -DTESTING" timing
./ltmtest
rm -f *.a *.o ltmtest
make CFLAGS="$(CFLAGS) -fbranch-probabilities"
#make a single object profiled library
profiled_single:
perl gen.pl
$(CC) $(CFLAGS) -fprofile-arcs -DTESTING -c mpi.c -o mpi.o
$(CC) $(CFLAGS) -DTESTING -DTIMER demo/timing.c mpi.o -o ltmtest
./ltmtest
rm -f *.o ltmtest
$(CC) $(CFLAGS) -fbranch-probabilities -DTESTING -c mpi.c -o mpi.o
$(AR) $(ARFLAGS) libtommath.a mpi.o
ranlib libtommath.a
install: libtommath.a
install -d -g root -o root $(DESTDIR)$(LIBPATH)
install -d -g root -o root $(DESTDIR)$(INCPATH)
@ -71,7 +98,7 @@ mtest: test
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s
timing: libtommath.a
$(CC) $(CFLAGS) -DTIMER demo/demo.c libtommath.a -o ltmtest -s
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest -s
# makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think]
docdvi: tommath.src
@ -106,10 +133,13 @@ mandvi: bn.tex
manual: mandvi
pdflatex bn >/dev/null
rm -f bn.aux bn.dvi bn.log bn.idx bn.lof bn.out bn.toc
pretty:
perl pretty.build
clean:
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex *~ demo/*~ etc/*~
cd etc ; make clean
cd pics ; make clean

View File

@ -30,7 +30,8 @@ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
bn_mp_init_set.obj bn_mp_init_set_int.obj
TARGET = libtommath.lib

View File

@ -35,7 +35,8 @@ bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o
# make a Windows DLL via Cygwin
windll: $(OBJECTS)

110
makefile.icc Normal file
View File

@ -0,0 +1,110 @@
#Makefile for ICC
#
#Tom St Denis
CC=icc
CFLAGS += -I./
# optimize for SPEED
#
# -mcpu= can be pentium, pentiumpro (covers PII through PIII) or pentium4
# -ax? specifies make code specifically for ? but compatible with IA-32
# -x? specifies compile solely for ? [not specifically IA-32 compatible]
#
# where ? is
# K - PIII
# W - first P4 [Williamette]
# N - P4 Northwood
# P - P4 Prescott
# B - Blend of P4 and PM [mobile]
#
# Default to just generic max opts
CFLAGS += -O3 -xN
default: libtommath.a
#default files to install
LIBNAME=libtommath.a
HEADERS=tommath.h
#LIBPATH-The directory for libtomcrypt to be installed to.
#INCPATH-The directory to install the header files for libtommath.
#DATAPATH-The directory to install the pdf docs.
DESTDIR=
LIBPATH=/usr/lib
INCPATH=/usr/include
DATAPATH=/usr/share/doc/libtommath/pdf
OBJECTS=bncore.o bn_mp_init.o bn_mp_clear.o bn_mp_exch.o bn_mp_grow.o bn_mp_shrink.o \
bn_mp_clamp.o bn_mp_zero.o bn_mp_set.o bn_mp_set_int.o bn_mp_init_size.o bn_mp_copy.o \
bn_mp_init_copy.o bn_mp_abs.o bn_mp_neg.o bn_mp_cmp_mag.o bn_mp_cmp.o bn_mp_cmp_d.o \
bn_mp_rshd.o bn_mp_lshd.o bn_mp_mod_2d.o bn_mp_div_2d.o bn_mp_mul_2d.o bn_mp_div_2.o \
bn_mp_mul_2.o bn_s_mp_add.o bn_s_mp_sub.o bn_fast_s_mp_mul_digs.o bn_s_mp_mul_digs.o \
bn_fast_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_s_mp_sqr.o \
bn_mp_add.o bn_mp_sub.o bn_mp_karatsuba_mul.o bn_mp_mul.o bn_mp_karatsuba_sqr.o \
bn_mp_sqr.o bn_mp_div.o bn_mp_mod.o bn_mp_add_d.o bn_mp_sub_d.o bn_mp_mul_d.o \
bn_mp_div_d.o bn_mp_mod_d.o bn_mp_expt_d.o bn_mp_addmod.o bn_mp_submod.o \
bn_mp_mulmod.o bn_mp_sqrmod.o bn_mp_gcd.o bn_mp_lcm.o bn_fast_mp_invmod.o bn_mp_invmod.o \
bn_mp_reduce.o bn_mp_montgomery_setup.o bn_fast_mp_montgomery_reduce.o bn_mp_montgomery_reduce.o \
bn_mp_exptmod_fast.o bn_mp_exptmod.o bn_mp_2expt.o bn_mp_n_root.o bn_mp_jacobi.o bn_reverse.o \
bn_mp_count_bits.o bn_mp_read_unsigned_bin.o bn_mp_read_signed_bin.o bn_mp_to_unsigned_bin.o \
bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o \
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o
libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
ranlib libtommath.a
#make a profiled library (takes a while!!!)
#
# This will build the library with profile generation
# then run the test demo and rebuild the library.
#
# So far I've seen improvements in the MP math
profiled:
make -f makefile.icc CFLAGS="$(CFLAGS) -prof_gen -DTESTING" timing
./ltmtest
rm -f *.a *.o ltmtest
make -f makefile.icc CFLAGS="$(CFLAGS) -prof_use"
#make a single object profiled library
profiled_single:
perl gen.pl
$(CC) $(CFLAGS) -prof_gen -DTESTING -c mpi.c -o mpi.o
$(CC) $(CFLAGS) -DTESTING -DTIMER demo/demo.c mpi.o -o ltmtest
./ltmtest
rm -f *.o ltmtest
$(CC) $(CFLAGS) -prof_use -ip -DTESTING -c mpi.c -o mpi.o
$(AR) $(ARFLAGS) libtommath.a mpi.o
ranlib libtommath.a
install: libtommath.a
install -d -g root -o root $(DESTDIR)$(LIBPATH)
install -d -g root -o root $(DESTDIR)$(INCPATH)
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
test: libtommath.a demo/demo.o
$(CC) demo/demo.o libtommath.a -o test
mtest: test
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest
timing: libtommath.a
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest
clean:
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.il etc/*.il *.dyn
cd etc ; make clean
cd pics ; make clean

View File

@ -29,7 +29,8 @@ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
bn_mp_init_set.obj bn_mp_init_set_int.obj
library: $(OBJECTS)
lib /out:tommath.lib $(OBJECTS)

Binary file not shown.

View File

@ -452,7 +452,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
}
/* setup dest */
olduse = c->used;
olduse = c->used;
c->used = digs;
{
@ -779,7 +779,7 @@ mp_2expt (mp_int * a, int b)
a->used = b / DIGIT_BIT + 1;
/* put the single bit in its place */
a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT);
a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
return MP_OKAY;
}
@ -1142,10 +1142,14 @@ mp_clamp (mp_int * a)
void
mp_clear (mp_int * a)
{
int i;
/* only do anything if a hasn't been freed previously */
if (a->dp != NULL) {
/* first zero the digits */
memset (a->dp, 0, sizeof (mp_digit) * a->used);
for (i = 0; i < a->used; i++) {
a->dp[i] = 0;
}
/* free ram */
XFREE(a->dp);
@ -1677,7 +1681,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
*/
/* get sign before writing to c */
x.sign = a->sign;
x.sign = x.used == 0 ? MP_ZPOS : a->sign;
if (c != NULL) {
mp_clamp (&q);
@ -3083,15 +3087,22 @@ int mp_grow (mp_int * a, int size)
*/
#include <tommath.h>
/* init a new bigint */
/* init a new mp_int */
int mp_init (mp_int * a)
{
int i;
/* allocate memory required and clear it */
a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC);
a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
if (a->dp == NULL) {
return MP_MEM;
}
/* set the digits to zero */
for (i = 0; i < MP_PREC; i++) {
a->dp[i] = 0;
}
/* set the used to zero, allocated digits to the default precision
* and sign to positive */
a->used = 0;
@ -3753,9 +3764,6 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
goto X0Y0;
/* now shift the digits */
x0.sign = x1.sign = a->sign;
y0.sign = y1.sign = b->sign;
x0.used = y0.used = B;
x1.used = a->used - B;
y1.used = b->used - B;
@ -4484,7 +4492,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
res = s_mp_mul (a, b, c);
}
}
c->sign = neg;
c->sign = (c->used > 0) ? neg : MP_ZPOS;
return res;
}
@ -6090,7 +6098,8 @@ mp_reduce_2k_setup(mp_int *a, mp_digit *d)
/* determines if mp_reduce_2k can be used */
int mp_reduce_is_2k(mp_int *a)
{
int ix, iy, iz, iw;
int ix, iy, iw;
mp_digit iz;
if (a->used == 0) {
return 0;
@ -6107,7 +6116,7 @@ int mp_reduce_is_2k(mp_int *a)
return 0;
}
iz <<= 1;
if (iz > (int)MP_MASK) {
if (iz > (mp_digit)MP_MASK) {
++iw;
iz = 1;
}
@ -8396,14 +8405,16 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
-------------------------------------------------------------
Intel P4 /GCC v3.2 / 70/ 108
AMD Athlon XP /GCC v3.2 / 109/ 127
Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build
Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build
Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build
Intel P4 Northwood /ICC v8.0 / 54/ 76/profiled_single build
AMD Athlon XP /GCC v3.2 / 109/ 127/
*/
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400;

66
pretty.build Normal file
View File

@ -0,0 +1,66 @@
#!/bin/perl -w
#
# Cute little builder for perl
# Total waste of development time...
#
# This will build all the object files and then the archive .a file
# requires GCC, GNU make and a sense of humour.
#
# Tom St Denis
use strict;
my $count = 0;
my $starttime = time;
my $rate = 0;
print "Scanning for source files...\n";
foreach my $filename (glob "*.c") {
++$count;
}
print "Source files to build: $count\nBuilding...\n";
my $i = 0;
my $lines = 0;
my $filesbuilt = 0;
foreach my $filename (glob "*.c") {
printf("Building %3.2f%%, ", (++$i/$count)*100.0);
if ($i % 4 == 0) { print "/, "; }
if ($i % 4 == 1) { print "-, "; }
if ($i % 4 == 2) { print "\\, "; }
if ($i % 4 == 3) { print "|, "; }
if ($rate > 0) {
my $tleft = ($count - $i) / $rate;
my $tsec = $tleft%60;
my $tmin = ($tleft/60)%60;
my $thour = ($tleft/3600)%60;
printf("%2d:%02d:%02d left, ", $thour, $tmin, $tsec);
}
my $cnt = ($i/$count)*30.0;
my $x = 0;
print "[";
for (; $x < $cnt; $x++) { print "#"; }
for (; $x < 30; $x++) { print " "; }
print "]\r";
my $tmp = $filename;
$tmp =~ s/\.c/".o"/ge;
if (open(SRC, "<$tmp")) {
close SRC;
} else {
!system("make $tmp > /dev/null 2>/dev/null") or die "\nERROR: Failed to make $tmp!!!\n";
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
++$lines while (<SRC>);
close SRC or die "Error closing $filename after reading: $!";
++$filesbuilt;
}
# update timer
if (time != $starttime) {
my $delay = time - $starttime;
$rate = $i/$delay;
}
}
# finish building the library
printf("\nFinished building source (%d seconds, %3.2f files per second).\n", time - $starttime, $rate);
print "Compiled approximately $filesbuilt files and $lines lines of code.\n";
print "Doing final make (building archive...)\n";
!system("make > /dev/null 2>/dev/null") or die "\nERROR: Failed to perform last make command!!!\n";
print "done.\n";

Binary file not shown.

View File

@ -258,7 +258,7 @@ floating point is meant to be implemented in hardware the precision of the manti
a mantissa of much larger precision than hardware alone can efficiently support. This approach could be useful where
scientific applications must minimize the total output error over long calculations.
Another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$).
Yet another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$).
In fact the library discussed within this text has already been used to form a polynomial basis library\footnote{See \url{http://poly.libtomcrypt.org} for more details.}.
\subsection{Benefits of Multiple Precision Arithmetic}
@ -316,7 +316,7 @@ the reader how the algorithms fit together as well as where to start on various
\section{Discussion and Notation}
\subsection{Notation}
A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1} ... x_1 x_0)_{ \beta }$ and represent
A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1}, \ldots, x_1, x_0)_{ \beta }$ and represent
the integer $x \equiv \sum_{i=0}^{n-1} x_i\beta^i$. The elements of the array $x$ are said to be the radix $\beta$ digits
of the integer. For example, $x = (1,2,3)_{10}$ would represent the integer
$1\cdot 10^2 + 2\cdot10^1 + 3\cdot10^0 = 123$.
@ -339,12 +339,11 @@ algorithms will be used to establish the relevant theory which will subsequently
precision algorithm to solve the same problem.
\subsection{Precision Notation}
For the purposes of this text a single precision variable must be able to represent integers in the range
$0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range
$0 \le x < q \beta^2$. The variable $\beta$ represents the radix of a single digit of a multiple precision integer and
must be of the form $q^p$ for $q, p \in \Z^+$. The extra radix-$q$ factor allows additions and subtractions to proceed
without truncation of the carry. Since all modern computers are binary, it is assumed that $q$ is two, for all intents
and purposes.
The variable $\beta$ represents the radix of a single digit of a multiple precision integer and
must be of the form $q^p$ for $q, p \in \Z^+$. A single precision variable must be able to represent integers in
the range $0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range
$0 \le x < q \beta^2$. The extra radix-$q$ factor allows additions and subtractions to proceed without truncation of the
carry. Since all modern computers are binary, it is assumed that $q$ is two.
\index{mp\_digit} \index{mp\_word}
Within the source code that will be presented for each algorithm, the data type \textbf{mp\_digit} will represent
@ -376,7 +375,7 @@ the $/$ division symbol is used the intention is to perform an integer division
$5/2 = 2$ which will often be written as $\lfloor 5/2 \rfloor = 2$ for clarity. When an expression is written as a
fraction a real value division is implied, for example ${5 \over 2} = 2.5$.
The norm of a multiple precision integer, for example, $\vert \vert x \vert \vert$ will be used to represent the number of digits in the representation
The norm of a multiple precision integer, for example $\vert \vert x \vert \vert$, will be used to represent the number of digits in the representation
of the integer. For example, $\vert \vert 123 \vert \vert = 3$ and $\vert \vert 79452 \vert \vert = 5$.
\subsection{Work Effort}
@ -569,7 +568,7 @@ By building outwards from a base foundation instead of using a parallel design m
highly modular. Being highly modular is a desirable property of any project as it often means the resulting product
has a small footprint and updates are easy to perform.
Usually when I start a project I will begin with the header file. I define the data types I think I will need and
Usually when I start a project I will begin with the header files. I define the data types I think I will need and
prototype the initial functions that are not dependent on other functions (within the library). After I
implement these base functions I prototype more dependent functions and implement them. The process repeats until
I implement all of the functions I require. For example, in the case of LibTomMath I implemented functions such as
@ -619,14 +618,26 @@ any such data type but it does provide for making composite data types known as
used within LibTomMath.
\index{mp\_int}
\begin{verbatim}
typedef struct {
int used, alloc, sign;
mp_digit *dp;
} mp_int;
\end{verbatim}
\begin{figure}[here]
\begin{center}
\begin{small}
%\begin{verbatim}
\begin{tabular}{|l|}
\hline
typedef struct \{ \\
\hspace{3mm}int used, alloc, sign;\\
\hspace{3mm}mp\_digit *dp;\\
\} \textbf{mp\_int}; \\
\hline
\end{tabular}
%\end{verbatim}
\end{small}
\caption{The mp\_int Structure}
\label{fig:mpint}
\end{center}
\end{figure}
The mp\_int structure can be broken down as follows.
The mp\_int structure (fig. \ref{fig:mpint}) can be broken down as follows.
\begin{enumerate}
\item The \textbf{used} parameter denotes how many digits of the array \textbf{dp} contain the digits used to represent
@ -701,9 +712,10 @@ fault by dereferencing memory not owned by the application.
In the case of LibTomMath the only errors that are checked for are related to inappropriate inputs (division by zero for
instance) and memory allocation errors. It will not check that the mp\_int passed to any function is valid nor
will it check pointers for validity. Any function that can cause a runtime error will return an error code as an
\textbf{int} data type with one of the following values.
\textbf{int} data type with one of the following values (fig \ref{fig:errcodes}).
\index{MP\_OKAY} \index{MP\_VAL} \index{MP\_MEM}
\begin{figure}[here]
\begin{center}
\begin{tabular}{|l|l|}
\hline \textbf{Value} & \textbf{Meaning} \\
@ -713,6 +725,9 @@ will it check pointers for validity. Any function that can cause a runtime erro
\hline
\end{tabular}
\end{center}
\caption{LibTomMath Error Codes}
\label{fig:errcodes}
\end{figure}
When an error is detected within a function it should free any memory it allocated, often during the initialization of
temporary mp\_ints, and return as soon as possible. The goal is to leave the system in the same state it was when the
@ -748,6 +763,7 @@ to zero. The \textbf{used} count set to zero and \textbf{sign} set to \textbf{M
An mp\_int is said to be initialized if it is set to a valid, preferably default, state such that all of the members of the
structure are set to valid values. The mp\_init algorithm will perform such an action.
\index{mp\_init}
\begin{figure}[here]
\begin{center}
\begin{tabular}{l}
@ -770,17 +786,23 @@ structure are set to valid values. The mp\_init algorithm will perform such an
\end{figure}
\textbf{Algorithm mp\_init.}
The \textbf{MP\_PREC} name represents a constant\footnote{Defined in the ``tommath.h'' header file within LibTomMath.}
used to dictate the minimum precision of allocated mp\_int integers. Ideally, it is at least equal to $32$ since for most
purposes that will be more than enough.
The purpose of this function is to initialize an mp\_int structure so that the rest of the library can properly
manipulte it. It is assumed that the input may not have had any of its members previously initialized which is certainly
a valid assumption if the input resides on the stack.
Memory for the default number of digits is allocated first. If the allocation fails the algorithm returns immediately
with the \textbf{MP\_MEM} error code. If the allocation succeeds the remaining members of the mp\_int structure
must be initialized to reflect the default initial state.
Before any of the members such as \textbf{sign}, \textbf{used} or \textbf{alloc} are initialized the memory for
the digits is allocated. If this fails the function returns before setting any of the other members. The \textbf{MP\_PREC}
name represents a constant\footnote{Defined in the ``tommath.h'' header file within LibTomMath.}
used to dictate the minimum precision of newly initialized mp\_int integers. Ideally, it is at least equal to the smallest
precision number you'll be working with.
The allocated digits are all set to zero (step three) to ensure they are in a known state. The \textbf{sign}, \textbf{used}
and \textbf{alloc} are subsequently initialized to represent the zero integer. By step seven the algorithm returns a success
code and the mp\_int $a$ has been successfully initialized to a valid state representing the integer zero.
Allocating a block of digits at first instead of a single digit has the benefit of lowering the number of usually slow
heap operations later functions will have to perform in the future. If \textbf{MP\_PREC} is set correctly the slack
memory and the number of heap operations will be trivial.
Once the allocation has been made the digits have to be set to zero as well as the \textbf{used}, \textbf{sign} and
\textbf{alloc} members initialized. This ensures that the mp\_int will always represent the default state of zero regardless
of the original condition of the input.
\textbf{Remark.}
This function introduces the idiosyncrasy that all iterative loops, commonly initiated with the ``for'' keyword, iterate incrementally
@ -796,19 +818,21 @@ One immediate observation of this initializtion function is that it does not ret
is assumed that the caller has already allocated memory for the mp\_int structure, typically on the application stack. The
call to mp\_init() is used only to initialize the members of the structure to a known default state.
Before any of the other members of the structure are initialized memory from the application heap is allocated with
the calloc() function (line @22,calloc@). The size of the allocated memory is large enough to hold \textbf{MP\_PREC}
mp\_digit variables. The calloc() function is used instead\footnote{calloc() will allocate memory in the same
manner as malloc() except that it also sets the contents to zero upon successfully allocating the memory.} of malloc()
since digits have to be set to zero for the function to finish correctly. The \textbf{OPT\_CAST} token is a macro
definition which will turn into a cast from void * to mp\_digit * for C++ compilers. It is not required for C compilers.
Here we see (line @23,XMALLOC@) the memory allocation is performed first. This allows us to exit cleanly and quickly
if there is an error. If the allocation fails the routine will return \textbf{MP\_MEM} to the caller to indicate there
was a memory error. The function XMALLOC is what actually allocates the memory. Technically XMALLOC is not a function
but a macro defined in ``tommath.h``. By default, XMALLOC will evaluate to malloc() which is the C library's built--in
memory allocation routine.
After the memory has been successfully allocated the remainder of the members are initialized
In order to assure the mp\_int is in a known state the digits must be set to zero. On most platforms this could have been
accomplished by using calloc() instead of malloc(). However, to correctly initialize a integer type to a given value in a
portable fashion you have to actually assign the value. The for loop (line @28,for@) performs this required
operation.
After the memory has been successfully initialized the remainder of the members are initialized
(lines @29,used@ through @31,sign@) to their respective default states. At this point the algorithm has succeeded and
a success code is returned to the calling function.
If this function returns \textbf{MP\_OKAY} it is safe to assume the mp\_int structure has been properly initialized and
is safe to use with other functions within the library.
a success code is returned to the calling function. If this function returns \textbf{MP\_OKAY} it is safe to assume the
mp\_int structure has been properly initialized and is safe to use with other functions within the library.
\subsection{Clearing an mp\_int}
When an mp\_int is no longer required by the application, the memory that has been allocated for its digits must be
@ -819,7 +843,7 @@ returned to the application's memory pool with the mp\_clear algorithm.
\begin{tabular}{l}
\hline Algorithm \textbf{mp\_clear}. \\
\textbf{Input}. An mp\_int $a$ \\
\textbf{Output}. The memory for $a$ is freed for reuse. \\
\textbf{Output}. The memory for $a$ shall be deallocated. \\
\hline \\
1. If $a$ has been previously freed then return(\textit{MP\_OKAY}). \\
2. for $n$ from 0 to $a.used - 1$ do \\
@ -836,32 +860,31 @@ returned to the application's memory pool with the mp\_clear algorithm.
\end{figure}
\textbf{Algorithm mp\_clear.}
This algorithm releases the memory allocated for an mp\_int back into the memory pool for reuse. It is designed
such that a given mp\_int structure can be cleared multiple times between initializations without attempting to
free the memory twice\footnote{In ISO C for example, calling free() twice on the same memory block causes undefinied
behaviour.}.
This algorithm accomplishes two goals. First, it clears the digits and the other mp\_int members. This ensures that
if a developer accidentally re-uses a cleared structure it is less likely to cause problems. The second goal
is to free the allocated memory.
The first step determines if the mp\_int structure has been marked as free already. If it has, the algorithm returns
success immediately as no further actions are required. Otherwise, the algorithm will proceed to put the structure
in a known empty and otherwise invalid state. First the digits of the mp\_int are set to zero. The memory that has been allocated for the
digits is then freed. The \textbf{used} and \textbf{alloc} counts are both set to zero and the \textbf{sign} set to
\textbf{MP\_ZPOS}. This known fixed state for cleared mp\_int structures will make debuging easier for the end
developer. That is, if they spot (via their debugger) an mp\_int they are using that is in this state it will be
obvious that they erroneously and prematurely cleared the mp\_int structure.
The logic behind the algorithm is extended by marking cleared mp\_int structures so that subsequent calls to this
algorithm will not try to free the memory multiple times. Cleared mp\_ints are detectable by having a pre-defined invalid
digit pointer \textbf{dp} setting.
Note that once an mp\_int has been cleared the mp\_int structure is no longer in a valid state for any other algorithm
Once an mp\_int has been cleared the mp\_int structure is no longer in a valid state for any other algorithm
with the exception of algorithms mp\_init, mp\_init\_copy, mp\_init\_size and mp\_clear.
EXAM,bn_mp_clear.c
The ``if'' statement (line @21,a->dp != NULL@) prevents the heap from being corrupted if a user double-frees an
mp\_int. This is because once the memory is freed the pointer is set to \textbf{NULL} (line @30,NULL@).
The algorithm only operates on the mp\_int if it hasn't been previously cleared. The if statement (line @23,a->dp != NULL@)
checks to see if the \textbf{dp} member is not \textbf{NULL}. If the mp\_int is a valid mp\_int then \textbf{dp} cannot be
\textbf{NULL} in which case the if statement will evaluate to true.
Without the check, code that accidentally calls mp\_clear twice for a given mp\_int structure would try to free the memory
allocated for the digits twice. This may cause some C libraries to signal a fault. By setting the pointer to
\textbf{NULL} it helps debug code that may inadvertently free the mp\_int before it is truly not needed, because attempts
to reference digits should fail immediately. The allocated digits are set to zero before being freed (line @24,memset@).
This is ideal for cryptographic situations where the integer that the mp\_int represents might need to be kept a secret.
The digits of the mp\_int are cleared by the for loop (line @25,for@) which assigns a zero to every digit. Similar to mp\_init()
the digits are assigned zero instead of using block memory operations (such as memset()) since this is more portable.
The digits are deallocated off the heap via the XFREE macro. Similar to XMALLOC the XFREE macro actually evaluates to
a standard C library function. In this case the free() function. Since free() only deallocates the memory the pointer
still has to be reset to \textbf{NULL} manually (line @33,NULL@).
Now that the digits have been cleared and deallocated the other members are set to their final values (lines @34,= 0@ and @35,ZPOS@).
\section{Maintenance Algorithms}
@ -889,7 +912,7 @@ must be re-sized appropriately to accomodate the result. The mp\_grow algorithm
1. if $a.alloc \ge b$ then return(\textit{MP\_OKAY}) \\
2. $u \leftarrow b\mbox{ (mod }MP\_PREC\mbox{)}$ \\
3. $v \leftarrow b + 2 \cdot MP\_PREC - u$ \\
4. Re-Allocate the array of digits $a$ to size $v$ \\
4. Re-allocate the array of digits $a$ to size $v$ \\
5. If the allocation failed then return(\textit{MP\_MEM}). \\
6. for n from a.alloc to $v - 1$ do \\
\hspace{+3mm}6.1 $a_n \leftarrow 0$ \\
@ -914,15 +937,19 @@ assumed to contain undefined values they are initially set to zero.
EXAM,bn_mp_grow.c
The first step is to see if we actually need to perform a re-allocation at all (line @24,a->alloc < size@). If a reallocation
must occur the digit count is padded upwards to help prevent many trivial reallocations (line @28,size@). Next the reallocation is performed
and the return of realloc() is stored in a temporary pointer named $tmp$ (line @36,realloc@). The return is stored in a temporary
instead of $a.dp$ to prevent the code from losing the original pointer in case the reallocation fails. Had the return been stored
in $a.dp$ instead there would be no way to reclaim the heap originally used.
A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @23,if@) checks
if the \textbf{alloc} member of the mp\_int is smaller than the requested digit count. If the count is not larger than \textbf{alloc}
the function skips the re-allocation part thus saving time.
If the reallocation fails the function will return \textbf{MP\_MEM} (line @39,return@), otherwise, the value of $tmp$ is assigned
to the pointer $a.dp$ and the function continues. A simple for loop from line @48,a->alloc@ to line @50,}@ will zero all digits
that were above the old \textbf{alloc} limit to make sure the integer is in a known state.
When a re-allocation is performed it is turned into an optimal request to save time in the future. The requested digit count is
padded upwards to 2nd multiple of \textbf{MP\_PREC} larger than \textbf{alloc} (line @25, size@). The XREALLOC function is used
to re-allocate the memory. As per the other functions XREALLOC is actually a macro which evaluates to realloc by default. The realloc
function leaves the base of the allocation intact which means the first \textbf{alloc} digits of the mp\_int are the same as before
the re-allocation. All that is left is to clear the newly allocated digits and return.
Note that the re-allocation result is actually stored in a temporary pointer $tmp$. This is to allow this function to return
an error with a valid pointer. Earlier releases of the library stored the result of XREALLOC into the mp\_int $a$. That would
result in a memory leak if XREALLOC ever failed.
\subsection{Initializing Variable Precision mp\_ints}
Occasionally the number of digits required will be known in advance of an initialization, based on, for example, the size
@ -970,7 +997,7 @@ The number of digits $b$ requested is padded (line @22,MP_PREC@) by first augmen
mp\_int is placed in a default state representing the integer zero. Otherwise, the error code \textbf{MP\_MEM} will be
returned (line @27,return@).
The digits are allocated and set to zero at the same time with the calloc() function (line @25,calloc@). The
The digits are allocated and set to zero at the same time with the calloc() function (line @25,XCALLOC@). The
\textbf{used} count is set to zero, the \textbf{alloc} count set to the padded digit count and the \textbf{sign} flag set
to \textbf{MP\_ZPOS} to achieve a default valid mp\_int state (lines @29,used@, @30,alloc@ and @31,sign@). If the function
returns succesfully then it is correct to assume that the mp\_int structure is in a valid state for the remainder of the

File diff suppressed because it is too large Load Diff