added libtommath-0.31
2
bn.tex
@ -49,7 +49,7 @@
|
||||
\begin{document}
|
||||
\frontmatter
|
||||
\pagestyle{empty}
|
||||
\title{LibTomMath User Manual \\ v0.30}
|
||||
\title{LibTomMath User Manual \\ v0.31}
|
||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||
\maketitle
|
||||
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been
|
||||
|
@ -88,7 +88,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
}
|
||||
|
||||
/* setup dest */
|
||||
olduse = c->used;
|
||||
olduse = c->used;
|
||||
c->used = digs;
|
||||
|
||||
{
|
||||
|
@ -36,7 +36,7 @@ mp_2expt (mp_int * a, int b)
|
||||
a->used = b / DIGIT_BIT + 1;
|
||||
|
||||
/* put the single bit in its place */
|
||||
a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT);
|
||||
a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -18,10 +18,14 @@
|
||||
void
|
||||
mp_clear (mp_int * a)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* only do anything if a hasn't been freed previously */
|
||||
if (a->dp != NULL) {
|
||||
/* first zero the digits */
|
||||
memset (a->dp, 0, sizeof (mp_digit) * a->used);
|
||||
for (i = 0; i < a->used; i++) {
|
||||
a->dp[i] = 0;
|
||||
}
|
||||
|
||||
/* free ram */
|
||||
XFREE(a->dp);
|
||||
|
@ -187,7 +187,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
*/
|
||||
|
||||
/* get sign before writing to c */
|
||||
x.sign = a->sign;
|
||||
x.sign = x.used == 0 ? MP_ZPOS : a->sign;
|
||||
|
||||
if (c != NULL) {
|
||||
mp_clamp (&q);
|
||||
|
11
bn_mp_init.c
@ -14,15 +14,22 @@
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* init a new bigint */
|
||||
/* init a new mp_int */
|
||||
int mp_init (mp_int * a)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* allocate memory required and clear it */
|
||||
a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC);
|
||||
a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
|
||||
if (a->dp == NULL) {
|
||||
return MP_MEM;
|
||||
}
|
||||
|
||||
/* set the digits to zero */
|
||||
for (i = 0; i < MP_PREC; i++) {
|
||||
a->dp[i] = 0;
|
||||
}
|
||||
|
||||
/* set the used to zero, allocated digits to the default precision
|
||||
* and sign to positive */
|
||||
a->used = 0;
|
||||
|
@ -76,9 +76,6 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
goto X0Y0;
|
||||
|
||||
/* now shift the digits */
|
||||
x0.sign = x1.sign = a->sign;
|
||||
y0.sign = y1.sign = b->sign;
|
||||
|
||||
x0.used = y0.used = B;
|
||||
x1.used = a->used - B;
|
||||
y1.used = b->used - B;
|
||||
|
@ -43,6 +43,6 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
res = s_mp_mul (a, b, c);
|
||||
}
|
||||
}
|
||||
c->sign = neg;
|
||||
c->sign = (c->used > 0) ? neg : MP_ZPOS;
|
||||
return res;
|
||||
}
|
||||
|
@ -17,7 +17,8 @@
|
||||
/* determines if mp_reduce_2k can be used */
|
||||
int mp_reduce_is_2k(mp_int *a)
|
||||
{
|
||||
int ix, iy, iz, iw;
|
||||
int ix, iy, iw;
|
||||
mp_digit iz;
|
||||
|
||||
if (a->used == 0) {
|
||||
return 0;
|
||||
@ -34,7 +35,7 @@ int mp_reduce_is_2k(mp_int *a)
|
||||
return 0;
|
||||
}
|
||||
iz <<= 1;
|
||||
if (iz > (int)MP_MASK) {
|
||||
if (iz > (mp_digit)MP_MASK) {
|
||||
++iw;
|
||||
iz = 1;
|
||||
}
|
||||
|
14
bncore.c
@ -18,14 +18,16 @@
|
||||
|
||||
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
|
||||
-------------------------------------------------------------
|
||||
Intel P4 /GCC v3.2 / 70/ 108
|
||||
AMD Athlon XP /GCC v3.2 / 109/ 127
|
||||
|
||||
Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build
|
||||
Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build
|
||||
Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build
|
||||
Intel P4 Northwood /ICC v8.0 / 54/ 76/profiled_single build
|
||||
AMD Athlon XP /GCC v3.2 / 109/ 127/
|
||||
|
||||
*/
|
||||
|
||||
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
|
||||
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
|
||||
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
|
||||
TOOM_SQR_CUTOFF = 400;
|
||||
|
@ -84,6 +84,7 @@ while (<IN>) {
|
||||
$text[$line++] = $_;
|
||||
last if ($_ =~ /tommath\.h/);
|
||||
}
|
||||
<SRC>;
|
||||
}
|
||||
|
||||
$inline = 0;
|
||||
|
@ -1,3 +1,12 @@
|
||||
August 9th, 2004
|
||||
v0.31 -- "profiled" builds now :-) new timings for Intel Northwoods
|
||||
-- Added "pretty" build target
|
||||
-- Update mp_init() to actually assign 0's instead of relying on calloc()
|
||||
-- "Wolfgang Ehrhardt" <Wolfgang.Ehrhardt@munich.netsurf.de> found a bug in mp_mul() where if
|
||||
you multiply a negative by zero you get negative zero as the result. Oops.
|
||||
-- J Harper from PeerSec let me toy with his AMD64 and I got 60-bit digits working properly
|
||||
[this also means that I fixed a bug where if sizeof(int) < sizeof(mp_digit) it would bug]
|
||||
|
||||
April 11th, 2004
|
||||
v0.30 -- Added "mp_toradix_n" which stores upto "n-1" least significant digits of an mp_int
|
||||
-- Johan Lindh sent a patch so MSVC wouldn't whine about redefining malloc [in weird dll modes]
|
||||
|
296
demo/demo.c
@ -1,7 +1,5 @@
|
||||
#include <time.h>
|
||||
|
||||
#define TESTING
|
||||
|
||||
#ifdef IOWNANATHLON
|
||||
#include <unistd.h>
|
||||
#define SLEEP sleep(4)
|
||||
@ -11,49 +9,6 @@
|
||||
|
||||
#include "tommath.h"
|
||||
|
||||
#ifdef TIMER
|
||||
ulong64 _tt;
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
|
||||
/* RDTSC from Scott Duplichan */
|
||||
static ulong64 TIMFUNC (void)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
#ifdef __i386__
|
||||
ulong64 a;
|
||||
__asm__ __volatile__ ("rdtsc ":"=A" (a));
|
||||
return a;
|
||||
#else /* gcc-IA64 version */
|
||||
unsigned long result;
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
while (__builtin_expect ((int) result == -1, 0))
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
return result;
|
||||
#endif
|
||||
|
||||
// Microsoft and Intel Windows compilers
|
||||
#elif defined _M_IX86
|
||||
__asm rdtsc
|
||||
#elif defined _M_AMD64
|
||||
return __rdtsc ();
|
||||
#elif defined _M_IA64
|
||||
#if defined __INTEL_COMPILER
|
||||
#include <ia64intrin.h>
|
||||
#endif
|
||||
return __getReg (3116);
|
||||
#else
|
||||
#error need rdtsc function for this build
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#define TIMFUNC clock
|
||||
#endif
|
||||
|
||||
ulong64 rdtsc(void) { return TIMFUNC() - _tt; }
|
||||
void reset(void) { _tt = TIMFUNC(); }
|
||||
|
||||
#endif
|
||||
|
||||
void ndraw(mp_int *a, char *name)
|
||||
{
|
||||
char buf[4096];
|
||||
@ -89,10 +44,6 @@ int myrng(unsigned char *dst, int len, void *dat)
|
||||
}
|
||||
|
||||
|
||||
#define DO2(x) x; x;
|
||||
#define DO4(x) DO2(x); DO2(x);
|
||||
#define DO8(x) DO4(x); DO4(x);
|
||||
#define DO(x) DO8(x); DO8(x);
|
||||
|
||||
char cmd[4096], buf[4096];
|
||||
int main(void)
|
||||
@ -103,10 +54,6 @@ int main(void)
|
||||
unsigned rr;
|
||||
int i, n, err, cnt, ix, old_kara_m, old_kara_s;
|
||||
|
||||
#ifdef TIMER
|
||||
ulong64 tt, CLK_PER_SEC;
|
||||
FILE *log, *logb, *logc;
|
||||
#endif
|
||||
|
||||
mp_init(&a);
|
||||
mp_init(&b);
|
||||
@ -117,11 +64,10 @@ int main(void)
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
#ifdef TESTING
|
||||
// test mp_get_int
|
||||
printf("Testing: mp_get_int\n");
|
||||
for(i=0;i<1000;++i) {
|
||||
t = (unsigned long)rand()*rand()+1;
|
||||
t = ((unsigned long)rand()*rand()+1)&0xFFFFFFFF;
|
||||
mp_set_int(&a,t);
|
||||
if (t!=mp_get_int(&a)) {
|
||||
printf("mp_get_int() bad result!\n");
|
||||
@ -141,7 +87,7 @@ int main(void)
|
||||
|
||||
// test mp_sqrt
|
||||
printf("Testing: mp_sqrt\n");
|
||||
for (i=0;i<10000;++i) {
|
||||
for (i=0;i<1000;++i) {
|
||||
printf("%6d\r", i); fflush(stdout);
|
||||
n = (rand()&15)+1;
|
||||
mp_rand(&a,n);
|
||||
@ -157,7 +103,7 @@ int main(void)
|
||||
}
|
||||
|
||||
printf("\nTesting: mp_is_square\n");
|
||||
for (i=0;i<100000;++i) {
|
||||
for (i=0;i<1000;++i) {
|
||||
printf("%6d\r", i); fflush(stdout);
|
||||
|
||||
/* test mp_is_square false negatives */
|
||||
@ -186,11 +132,9 @@ int main(void)
|
||||
|
||||
}
|
||||
printf("\n\n");
|
||||
#endif
|
||||
|
||||
#ifdef TESTING
|
||||
/* test for size */
|
||||
for (ix = 16; ix < 512; ix++) {
|
||||
for (ix = 10; ix < 256; ix++) {
|
||||
printf("Testing (not safe-prime): %9d bits \r", ix); fflush(stdout);
|
||||
err = mp_prime_random_ex(&a, 8, ix, (rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON, myrng, NULL);
|
||||
if (err != MP_OKAY) {
|
||||
@ -203,7 +147,7 @@ int main(void)
|
||||
}
|
||||
}
|
||||
|
||||
for (ix = 16; ix < 512; ix++) {
|
||||
for (ix = 16; ix < 256; ix++) {
|
||||
printf("Testing ( safe-prime): %9d bits \r", ix); fflush(stdout);
|
||||
err = mp_prime_random_ex(&a, 8, ix, ((rand()&1)?LTM_PRIME_2MSB_OFF:LTM_PRIME_2MSB_ON)|LTM_PRIME_SAFE, myrng, NULL);
|
||||
if (err != MP_OKAY) {
|
||||
@ -225,9 +169,7 @@ int main(void)
|
||||
}
|
||||
|
||||
printf("\n\n");
|
||||
#endif
|
||||
|
||||
#ifdef TESTING
|
||||
mp_read_radix(&a, "123456", 10);
|
||||
mp_toradix_n(&a, buf, 10, 3);
|
||||
printf("a == %s\n", buf);
|
||||
@ -235,7 +177,6 @@ int main(void)
|
||||
printf("a == %s\n", buf);
|
||||
mp_toradix_n(&a, buf, 10, 30);
|
||||
printf("a == %s\n", buf);
|
||||
#endif
|
||||
|
||||
|
||||
#if 0
|
||||
@ -248,22 +189,6 @@ int main(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
{
|
||||
mp_word aa, bb;
|
||||
|
||||
for (;;) {
|
||||
aa = abs(rand()) & MP_MASK;
|
||||
bb = abs(rand()) & MP_MASK;
|
||||
if (MULT(aa,bb) != (aa*bb)) {
|
||||
printf("%llu * %llu == %llu or %llu?\n", aa, bb, (ulong64)MULT(aa,bb), (ulong64)(aa*bb));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TESTING
|
||||
/* test mp_cnt_lsb */
|
||||
printf("testing mp_cnt_lsb...\n");
|
||||
mp_set(&a, 1);
|
||||
@ -274,12 +199,10 @@ int main(void)
|
||||
}
|
||||
mp_mul_2(&a, &a);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* test mp_reduce_2k */
|
||||
#ifdef TESTING
|
||||
printf("Testing mp_reduce_2k...\n");
|
||||
for (cnt = 3; cnt <= 384; ++cnt) {
|
||||
for (cnt = 3; cnt <= 128; ++cnt) {
|
||||
mp_digit tmp;
|
||||
mp_2expt(&a, cnt);
|
||||
mp_sub_d(&a, 2, &a); /* a = 2**cnt - 2 */
|
||||
@ -289,7 +212,7 @@ int main(void)
|
||||
printf("(%d)", mp_reduce_is_2k(&a));
|
||||
mp_reduce_2k_setup(&a, &tmp);
|
||||
printf("(%d)", tmp);
|
||||
for (ix = 0; ix < 10000; ix++) {
|
||||
for (ix = 0; ix < 1000; ix++) {
|
||||
if (!(ix & 127)) {printf("."); fflush(stdout); }
|
||||
mp_rand(&b, (cnt/DIGIT_BIT + 1) * 2);
|
||||
mp_copy(&c, &b);
|
||||
@ -301,14 +224,11 @@ int main(void)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* test mp_div_3 */
|
||||
#ifdef TESTING
|
||||
printf("Testing mp_div_3...\n");
|
||||
mp_set(&d, 3);
|
||||
for (cnt = 0; cnt < 1000000; ) {
|
||||
for (cnt = 0; cnt < 10000; ) {
|
||||
mp_digit r1, r2;
|
||||
|
||||
if (!(++cnt & 127)) printf("%9d\r", cnt);
|
||||
@ -321,12 +241,10 @@ int main(void)
|
||||
}
|
||||
}
|
||||
printf("\n\nPassed div_3 testing\n");
|
||||
#endif
|
||||
|
||||
/* test the DR reduction */
|
||||
#ifdef TESTING
|
||||
printf("testing mp_dr_reduce...\n");
|
||||
for (cnt = 2; cnt < 128; cnt++) {
|
||||
for (cnt = 2; cnt < 32; cnt++) {
|
||||
printf("%d digit modulus\n", cnt);
|
||||
mp_grow(&a, cnt);
|
||||
mp_zero(&a);
|
||||
@ -334,7 +252,7 @@ int main(void)
|
||||
a.dp[ix] = MP_MASK;
|
||||
}
|
||||
a.used = cnt;
|
||||
mp_prime_next_prime(&a, 3, 0);
|
||||
a.dp[0] = 3;
|
||||
|
||||
mp_rand(&b, cnt - 1);
|
||||
mp_copy(&b, &c);
|
||||
@ -346,206 +264,16 @@ int main(void)
|
||||
mp_copy(&b, &c);
|
||||
|
||||
mp_mod(&b, &a, &b);
|
||||
mp_dr_reduce(&c, &a, (1<<DIGIT_BIT)-a.dp[0]);
|
||||
mp_dr_reduce(&c, &a, (((mp_digit)1)<<DIGIT_BIT)-a.dp[0]);
|
||||
|
||||
if (mp_cmp(&b, &c) != MP_EQ) {
|
||||
printf("Failed on trial %lu\n", rr); exit(-1);
|
||||
|
||||
}
|
||||
} while (++rr < 100000);
|
||||
} while (++rr < 500);
|
||||
printf("Passed DR test for %d digits\n", cnt);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TIMER
|
||||
/* temp. turn off TOOM */
|
||||
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
|
||||
|
||||
reset();
|
||||
sleep(1);
|
||||
CLK_PER_SEC = rdtsc();
|
||||
|
||||
printf("CLK_PER_SEC == %lu\n", CLK_PER_SEC);
|
||||
|
||||
|
||||
log = fopen("logs/add.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
reset();
|
||||
rr = 0;
|
||||
do {
|
||||
DO(mp_add(&a,&b,&c));
|
||||
rr += 16;
|
||||
} while (rdtsc() < (CLK_PER_SEC * 2));
|
||||
tt = rdtsc();
|
||||
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
log = fopen("logs/sub.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
reset();
|
||||
rr = 0;
|
||||
do {
|
||||
DO(mp_sub(&a,&b,&c));
|
||||
rr += 16;
|
||||
} while (rdtsc() < (CLK_PER_SEC * 2));
|
||||
tt = rdtsc();
|
||||
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
/* do mult/square twice, first without karatsuba and second with */
|
||||
mult_test:
|
||||
old_kara_m = KARATSUBA_MUL_CUTOFF;
|
||||
old_kara_s = KARATSUBA_SQR_CUTOFF;
|
||||
for (ix = 0; ix < 2; ix++) {
|
||||
printf("With%s Karatsuba\n", (ix==0)?"out":"");
|
||||
|
||||
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
|
||||
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
|
||||
|
||||
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
|
||||
for (cnt = 32; cnt <= 288; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
reset();
|
||||
rr = 0;
|
||||
do {
|
||||
DO(mp_mul(&a, &b, &c));
|
||||
rr += 16;
|
||||
} while (rdtsc() < (CLK_PER_SEC * 2));
|
||||
tt = rdtsc();
|
||||
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
|
||||
for (cnt = 32; cnt <= 288; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
reset();
|
||||
rr = 0;
|
||||
do {
|
||||
DO(mp_sqr(&a, &b));
|
||||
rr += 16;
|
||||
} while (rdtsc() < (CLK_PER_SEC * 2));
|
||||
tt = rdtsc();
|
||||
printf("Squaring\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
}
|
||||
expt_test:
|
||||
{
|
||||
char *primes[] = {
|
||||
/* 2K moduli mersenne primes */
|
||||
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
|
||||
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
|
||||
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
|
||||
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
|
||||
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
|
||||
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
|
||||
|
||||
/* DR moduli */
|
||||
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
|
||||
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
|
||||
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
|
||||
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
|
||||
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
|
||||
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
|
||||
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
|
||||
|
||||
/* generic unrestricted moduli */
|
||||
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
|
||||
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
|
||||
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
|
||||
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
|
||||
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
|
||||
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
|
||||
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
|
||||
NULL
|
||||
};
|
||||
log = fopen("logs/expt.log", "w");
|
||||
logb = fopen("logs/expt_dr.log", "w");
|
||||
logc = fopen("logs/expt_2k.log", "w");
|
||||
for (n = 0; primes[n]; n++) {
|
||||
SLEEP;
|
||||
mp_read_radix(&a, primes[n], 10);
|
||||
mp_zero(&b);
|
||||
for (rr = 0; rr < mp_count_bits(&a); rr++) {
|
||||
mp_mul_2(&b, &b);
|
||||
b.dp[0] |= lbit();
|
||||
b.used += 1;
|
||||
}
|
||||
mp_sub_d(&a, 1, &c);
|
||||
mp_mod(&b, &c, &b);
|
||||
mp_set(&c, 3);
|
||||
reset();
|
||||
rr = 0;
|
||||
do {
|
||||
DO(mp_exptmod(&c, &b, &a, &d));
|
||||
rr += 16;
|
||||
} while (rdtsc() < (CLK_PER_SEC * 2));
|
||||
tt = rdtsc();
|
||||
mp_sub_d(&a, 1, &e);
|
||||
mp_sub(&e, &b, &b);
|
||||
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
|
||||
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
|
||||
if (mp_cmp_d(&d, 1)) {
|
||||
printf("Different (%d)!!!\n", mp_count_bits(&a));
|
||||
draw(&d);
|
||||
exit(0);
|
||||
}
|
||||
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
|
||||
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt);
|
||||
}
|
||||
}
|
||||
fclose(log);
|
||||
fclose(logb);
|
||||
fclose(logc);
|
||||
|
||||
log = fopen("logs/invmod.log", "w");
|
||||
for (cnt = 4; cnt <= 128; cnt += 4) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
|
||||
do {
|
||||
mp_add_d(&b, 1, &b);
|
||||
mp_gcd(&a, &b, &c);
|
||||
} while (mp_cmp_d(&c, 1) != MP_EQ);
|
||||
|
||||
reset();
|
||||
rr = 0;
|
||||
do {
|
||||
DO(mp_invmod(&b, &a, &c));
|
||||
rr += 16;
|
||||
} while (rdtsc() < (CLK_PER_SEC * 2));
|
||||
tt = rdtsc();
|
||||
mp_mulmod(&b, &c, &a, &d);
|
||||
if (mp_cmp_d(&d, 1) != MP_EQ) {
|
||||
printf("Failed to invert\n");
|
||||
return 0;
|
||||
}
|
||||
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu ticks\n", mp_count_bits(&a), (((ulong64)rr)*CLK_PER_SEC)/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, (((ulong64)rr)*CLK_PER_SEC)/tt);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
return 0;
|
||||
|
||||
#endif
|
||||
|
||||
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
|
||||
sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= 0;
|
||||
|
291
demo/timing.c
Normal file
@ -0,0 +1,291 @@
|
||||
#include <tommath.h>
|
||||
#include <time.h>
|
||||
|
||||
ulong64 _tt;
|
||||
|
||||
#ifdef IOWNANATHLON
|
||||
#include <unistd.h>
|
||||
#define SLEEP sleep(4)
|
||||
#else
|
||||
#define SLEEP
|
||||
#endif
|
||||
|
||||
|
||||
void ndraw(mp_int *a, char *name)
|
||||
{
|
||||
char buf[4096];
|
||||
printf("%s: ", name);
|
||||
mp_toradix(a, buf, 64);
|
||||
printf("%s\n", buf);
|
||||
}
|
||||
|
||||
static void draw(mp_int *a)
|
||||
{
|
||||
ndraw(a, "");
|
||||
}
|
||||
|
||||
|
||||
unsigned long lfsr = 0xAAAAAAAAUL;
|
||||
|
||||
int lbit(void)
|
||||
{
|
||||
if (lfsr & 0x80000000UL) {
|
||||
lfsr = ((lfsr << 1) ^ 0x8000001BUL) & 0xFFFFFFFFUL;
|
||||
return 1;
|
||||
} else {
|
||||
lfsr <<= 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
|
||||
/* RDTSC from Scott Duplichan */
|
||||
static ulong64 TIMFUNC (void)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
#ifdef __i386__
|
||||
ulong64 a;
|
||||
__asm__ __volatile__ ("rdtsc ":"=A" (a));
|
||||
return a;
|
||||
#else /* gcc-IA64 version */
|
||||
unsigned long result;
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
while (__builtin_expect ((int) result == -1, 0))
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
return result;
|
||||
#endif
|
||||
|
||||
// Microsoft and Intel Windows compilers
|
||||
#elif defined _M_IX86
|
||||
__asm rdtsc
|
||||
#elif defined _M_AMD64
|
||||
return __rdtsc ();
|
||||
#elif defined _M_IA64
|
||||
#if defined __INTEL_COMPILER
|
||||
#include <ia64intrin.h>
|
||||
#endif
|
||||
return __getReg (3116);
|
||||
#else
|
||||
#error need rdtsc function for this build
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#define TIMFUNC clock
|
||||
#endif
|
||||
|
||||
#define DO(x) x; x;
|
||||
//#define DO4(x) DO2(x); DO2(x);
|
||||
//#define DO8(x) DO4(x); DO4(x);
|
||||
//#define DO(x) DO8(x); DO8(x);
|
||||
|
||||
int main(void)
|
||||
{
|
||||
ulong64 tt, gg, CLK_PER_SEC;
|
||||
FILE *log, *logb, *logc;
|
||||
mp_int a, b, c, d, e, f;
|
||||
int n, cnt, ix, old_kara_m, old_kara_s;
|
||||
unsigned rr;
|
||||
|
||||
mp_init(&a);
|
||||
mp_init(&b);
|
||||
mp_init(&c);
|
||||
mp_init(&d);
|
||||
mp_init(&e);
|
||||
mp_init(&f);
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
|
||||
/* temp. turn off TOOM */
|
||||
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
|
||||
|
||||
CLK_PER_SEC = TIMFUNC();
|
||||
sleep(1);
|
||||
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
|
||||
|
||||
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
|
||||
|
||||
log = fopen("logs/add.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_add(&a,&b,&c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100000);
|
||||
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
log = fopen("logs/sub.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_sub(&a,&b,&c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100000);
|
||||
|
||||
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
/* do mult/square twice, first without karatsuba and second with */
|
||||
old_kara_m = KARATSUBA_MUL_CUTOFF;
|
||||
old_kara_s = KARATSUBA_SQR_CUTOFF;
|
||||
for (ix = 0; ix < 1; ix++) {
|
||||
printf("With%s Karatsuba\n", (ix==0)?"out":"");
|
||||
|
||||
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
|
||||
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
|
||||
|
||||
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
|
||||
for (cnt = 32; cnt <= 288; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_mul(&a, &b, &c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100);
|
||||
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
|
||||
for (cnt = 32; cnt <= 288; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_sqr(&a, &b));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100);
|
||||
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
char *primes[] = {
|
||||
/* 2K moduli mersenne primes */
|
||||
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
|
||||
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
|
||||
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
|
||||
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
|
||||
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
|
||||
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
|
||||
|
||||
/* DR moduli */
|
||||
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
|
||||
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
|
||||
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
|
||||
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
|
||||
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
|
||||
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
|
||||
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
|
||||
|
||||
/* generic unrestricted moduli */
|
||||
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
|
||||
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
|
||||
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
|
||||
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
|
||||
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
|
||||
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
|
||||
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
|
||||
NULL
|
||||
};
|
||||
log = fopen("logs/expt.log", "w");
|
||||
logb = fopen("logs/expt_dr.log", "w");
|
||||
logc = fopen("logs/expt_2k.log", "w");
|
||||
for (n = 0; primes[n]; n++) {
|
||||
SLEEP;
|
||||
mp_read_radix(&a, primes[n], 10);
|
||||
mp_zero(&b);
|
||||
for (rr = 0; rr < (unsigned)mp_count_bits(&a); rr++) {
|
||||
mp_mul_2(&b, &b);
|
||||
b.dp[0] |= lbit();
|
||||
b.used += 1;
|
||||
}
|
||||
mp_sub_d(&a, 1, &c);
|
||||
mp_mod(&b, &c, &b);
|
||||
mp_set(&c, 3);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_exptmod(&c, &b, &a, &d));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 10);
|
||||
mp_sub_d(&a, 1, &e);
|
||||
mp_sub(&e, &b, &b);
|
||||
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
|
||||
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
|
||||
if (mp_cmp_d(&d, 1)) {
|
||||
printf("Different (%d)!!!\n", mp_count_bits(&a));
|
||||
draw(&d);
|
||||
exit(0);
|
||||
}
|
||||
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), tt);
|
||||
}
|
||||
}
|
||||
fclose(log);
|
||||
fclose(logb);
|
||||
fclose(logc);
|
||||
|
||||
log = fopen("logs/invmod.log", "w");
|
||||
for (cnt = 4; cnt <= 128; cnt += 4) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
|
||||
do {
|
||||
mp_add_d(&b, 1, &b);
|
||||
mp_gcd(&a, &b, &c);
|
||||
} while (mp_cmp_d(&c, 1) != MP_EQ);
|
||||
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_invmod(&b, &a, &c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 1000);
|
||||
mp_mulmod(&b, &c, &a, &d);
|
||||
if (mp_cmp_d(&d, 1) != MP_EQ) {
|
||||
printf("Failed to invert\n");
|
||||
return 0;
|
||||
}
|
||||
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -46,4 +46,5 @@ mont: mont.o
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat
|
||||
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat \
|
||||
*.da *.dyn *.dpi *~
|
||||
|
67
etc/makefile.icc
Normal file
@ -0,0 +1,67 @@
|
||||
CC = icc
|
||||
|
||||
CFLAGS += -I../
|
||||
|
||||
# optimize for SPEED
|
||||
#
|
||||
# -mcpu= can be pentium, pentiumpro (covers PII through PIII) or pentium4
|
||||
# -ax? specifies make code specifically for ? but compatible with IA-32
|
||||
# -x? specifies compile solely for ? [not specifically IA-32 compatible]
|
||||
#
|
||||
# where ? is
|
||||
# K - PIII
|
||||
# W - first P4 [Williamette]
|
||||
# N - P4 Northwood
|
||||
# P - P4 Prescott
|
||||
# B - Blend of P4 and PM [mobile]
|
||||
#
|
||||
# Default to just generic max opts
|
||||
CFLAGS += -O3 -xN -ip
|
||||
|
||||
# default lib name (requires install with root)
|
||||
# LIBNAME=-ltommath
|
||||
|
||||
# libname when you can't install the lib with install
|
||||
LIBNAME=../libtommath.a
|
||||
|
||||
#provable primes
|
||||
pprime: pprime.o
|
||||
$(CC) pprime.o $(LIBNAME) -o pprime
|
||||
|
||||
# portable [well requires clock()] tuning app
|
||||
tune: tune.o
|
||||
$(CC) tune.o $(LIBNAME) -o tune
|
||||
|
||||
# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
|
||||
tune86: tune.c
|
||||
nasm -f coff timer.asm
|
||||
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
|
||||
|
||||
# for cygwin
|
||||
tune86c: tune.c
|
||||
nasm -f gnuwin32 timer.asm
|
||||
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
|
||||
|
||||
#make tune86 for linux or any ELF format
|
||||
tune86l: tune.c
|
||||
nasm -f elf -DUSE_ELF timer.asm
|
||||
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
|
||||
|
||||
# spits out mersenne primes
|
||||
mersenne: mersenne.o
|
||||
$(CC) mersenne.o $(LIBNAME) -o mersenne
|
||||
|
||||
# fines DR safe primes for the given config
|
||||
drprime: drprime.o
|
||||
$(CC) drprime.o $(LIBNAME) -o drprime
|
||||
|
||||
# fines 2k safe primes for the given config
|
||||
2kprime: 2kprime.o
|
||||
$(CC) 2kprime.o $(LIBNAME) -o 2kprime
|
||||
|
||||
mont: mont.o
|
||||
$(CC) mont.o $(LIBNAME) -o mont
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il
|
32
logs/add.log
@ -1,16 +1,16 @@
|
||||
224 20297071
|
||||
448 15151383
|
||||
672 13088682
|
||||
896 11111587
|
||||
1120 9240621
|
||||
1344 8221878
|
||||
1568 7227434
|
||||
1792 6718051
|
||||
2016 6042524
|
||||
2240 5685200
|
||||
2464 5240465
|
||||
2688 4818032
|
||||
2912 4412794
|
||||
3136 4155883
|
||||
3360 3927078
|
||||
3584 3722138
|
||||
224 1572
|
||||
448 1740
|
||||
672 1902
|
||||
896 2116
|
||||
1120 2324
|
||||
1344 2484
|
||||
1568 2548
|
||||
1792 2772
|
||||
2016 2958
|
||||
2240 3058
|
||||
2464 3276
|
||||
2688 3436
|
||||
2912 3542
|
||||
3136 3702
|
||||
3360 3926
|
||||
3584 4074
|
||||
|
BIN
logs/addsub.png
Before Width: | Height: | Size: 6.9 KiB After Width: | Height: | Size: 6.1 KiB |
@ -1,7 +1,7 @@
|
||||
513 745
|
||||
769 282
|
||||
1025 130
|
||||
2049 20
|
||||
2561 11
|
||||
3073 6
|
||||
4097 2
|
||||
513 19933908
|
||||
769 55707832
|
||||
1025 119872576
|
||||
2049 856114218
|
||||
2561 1602741360
|
||||
3073 2718192748
|
||||
4097 6264335828
|
||||
|
BIN
logs/expt.png
Before Width: | Height: | Size: 7.3 KiB After Width: | Height: | Size: 6.5 KiB |
@ -1,6 +1,6 @@
|
||||
521 783
|
||||
607 585
|
||||
1279 138
|
||||
2203 39
|
||||
3217 15
|
||||
4253 6
|
||||
521 18847776
|
||||
607 24665920
|
||||
1279 110036220
|
||||
2203 414562036
|
||||
3217 1108350966
|
||||
4253 2286079370
|
||||
|
@ -1,7 +1,7 @@
|
||||
532 1296
|
||||
784 551
|
||||
1036 283
|
||||
1540 109
|
||||
2072 52
|
||||
3080 18
|
||||
4116 7
|
||||
532 9656134
|
||||
784 23022274
|
||||
1036 45227854
|
||||
1540 129652848
|
||||
2072 280625626
|
||||
3080 845619480
|
||||
4116 1866206400
|
||||
|
@ -1,17 +1,17 @@
|
||||
set terminal png
|
||||
set size 1.75
|
||||
set ylabel "Operations per Second"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
set output "addsub.png"
|
||||
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
|
||||
|
||||
set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
||||
set terminal png
|
||||
set size 1.75
|
||||
set ylabel "Cycles per Operation"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
set output "addsub.png"
|
||||
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
|
||||
|
||||
set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)", 'expt_2k.log' smooth bezier title "Exptmod (2k Reduction)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
||||
|
@ -1,32 +0,0 @@
|
||||
112 17364
|
||||
224 8643
|
||||
336 8867
|
||||
448 6228
|
||||
560 4737
|
||||
672 2259
|
||||
784 2899
|
||||
896 1497
|
||||
1008 1238
|
||||
1120 1010
|
||||
1232 870
|
||||
1344 1265
|
||||
1456 1102
|
||||
1568 981
|
||||
1680 539
|
||||
1792 484
|
||||
1904 722
|
||||
2016 392
|
||||
2128 604
|
||||
2240 551
|
||||
2352 511
|
||||
2464 469
|
||||
2576 263
|
||||
2688 247
|
||||
2800 227
|
||||
2912 354
|
||||
3024 336
|
||||
3136 312
|
||||
3248 296
|
||||
3360 166
|
||||
3472 155
|
||||
3584 248
|
BIN
logs/invmod.png
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 4.8 KiB |
@ -1,13 +0,0 @@
|
||||
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
|
||||
Todo this type
|
||||
|
||||
make timing ; ltmtest
|
||||
|
||||
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
|
||||
|
||||
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
|
||||
them all :-)
|
||||
|
||||
Have fun
|
||||
|
||||
Tom
|
@ -1,16 +0,0 @@
|
||||
224 11069160
|
||||
448 9156136
|
||||
672 8089755
|
||||
896 7399424
|
||||
1120 6389352
|
||||
1344 5818648
|
||||
1568 5257112
|
||||
1792 4982160
|
||||
2016 4527856
|
||||
2240 4325312
|
||||
2464 4051760
|
||||
2688 3767640
|
||||
2912 3612520
|
||||
3136 3415208
|
||||
3360 3258656
|
||||
3584 3113360
|
Before Width: | Height: | Size: 6.8 KiB |
@ -1,7 +0,0 @@
|
||||
513 664
|
||||
769 256
|
||||
1025 117
|
||||
2049 17
|
||||
2561 9
|
||||
3073 5
|
||||
4097 2
|
BIN
logs/k7/expt.png
Before Width: | Height: | Size: 6.3 KiB |
@ -1,7 +0,0 @@
|
||||
532 1088
|
||||
784 460
|
||||
1036 240
|
||||
1540 92
|
||||
2072 43
|
||||
3080 15
|
||||
4116 6
|
@ -1,17 +0,0 @@
|
||||
set terminal png color
|
||||
set size 1.75
|
||||
set ylabel "Operations per Second"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
set output "addsub.png"
|
||||
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
|
||||
|
||||
set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
@ -1,24 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>LibTomMath Log Plots</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Addition and Subtraction</h1>
|
||||
<center><img src=addsub.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Multipliers</h1>
|
||||
<center><img src=mult.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Exptmod</h1>
|
||||
<center><img src=expt.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Modular Inverse</h1>
|
||||
<center><img src=invmod.png></center>
|
||||
<hr>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -1,32 +0,0 @@
|
||||
112 16248
|
||||
224 8192
|
||||
336 5320
|
||||
448 3560
|
||||
560 2728
|
||||
672 2064
|
||||
784 1704
|
||||
896 2176
|
||||
1008 1184
|
||||
1120 976
|
||||
1232 1280
|
||||
1344 1176
|
||||
1456 624
|
||||
1568 912
|
||||
1680 504
|
||||
1792 452
|
||||
1904 658
|
||||
2016 608
|
||||
2128 336
|
||||
2240 312
|
||||
2352 288
|
||||
2464 264
|
||||
2576 408
|
||||
2688 376
|
||||
2800 354
|
||||
2912 198
|
||||
3024 307
|
||||
3136 173
|
||||
3248 162
|
||||
3360 256
|
||||
3472 145
|
||||
3584 226
|
Before Width: | Height: | Size: 5.6 KiB |
@ -1,17 +0,0 @@
|
||||
896 322904
|
||||
1344 151592
|
||||
1792 90472
|
||||
2240 59984
|
||||
2688 42624
|
||||
3136 31872
|
||||
3584 24704
|
||||
4032 19704
|
||||
4480 16096
|
||||
4928 13376
|
||||
5376 11272
|
||||
5824 9616
|
||||
6272 8360
|
||||
6720 7304
|
||||
7168 1664
|
||||
7616 1472
|
||||
8064 1328
|
BIN
logs/k7/mult.png
Before Width: | Height: | Size: 8.1 KiB |
@ -1,17 +0,0 @@
|
||||
896 322872
|
||||
1344 151688
|
||||
1792 90480
|
||||
2240 59984
|
||||
2688 42656
|
||||
3136 32144
|
||||
3584 25840
|
||||
4032 21328
|
||||
4480 17856
|
||||
4928 14928
|
||||
5376 12856
|
||||
5824 11256
|
||||
6272 9880
|
||||
6720 8984
|
||||
7168 7928
|
||||
7616 7200
|
||||
8064 6576
|
@ -1,17 +0,0 @@
|
||||
896 415472
|
||||
1344 223736
|
||||
1792 141232
|
||||
2240 97624
|
||||
2688 71400
|
||||
3136 54800
|
||||
3584 16904
|
||||
4032 13528
|
||||
4480 10968
|
||||
4928 9128
|
||||
5376 7784
|
||||
5824 6672
|
||||
6272 5760
|
||||
6720 5056
|
||||
7168 4440
|
||||
7616 3952
|
||||
8064 3512
|
@ -1,17 +0,0 @@
|
||||
896 420464
|
||||
1344 224800
|
||||
1792 142808
|
||||
2240 97704
|
||||
2688 71416
|
||||
3136 54504
|
||||
3584 38320
|
||||
4032 32360
|
||||
4480 27576
|
||||
4928 23840
|
||||
5376 20688
|
||||
5824 18264
|
||||
6272 16176
|
||||
6720 14440
|
||||
7168 11688
|
||||
7616 10752
|
||||
8064 9936
|
@ -1,16 +0,0 @@
|
||||
224 9728504
|
||||
448 8573648
|
||||
672 7488096
|
||||
896 6714064
|
||||
1120 5950472
|
||||
1344 5457400
|
||||
1568 5038896
|
||||
1792 4683632
|
||||
2016 4384656
|
||||
2240 4105976
|
||||
2464 3871608
|
||||
2688 3650680
|
||||
2912 3463552
|
||||
3136 3290016
|
||||
3360 3135272
|
||||
3584 2993848
|
@ -1,33 +1,33 @@
|
||||
920 374785
|
||||
1142 242737
|
||||
1371 176704
|
||||
1596 134341
|
||||
1816 105537
|
||||
2044 85089
|
||||
2268 70051
|
||||
2490 58671
|
||||
2716 49851
|
||||
2937 42881
|
||||
3162 37288
|
||||
3387 32697
|
||||
3608 28915
|
||||
3836 25759
|
||||
4057 23088
|
||||
4284 20800
|
||||
4508 18827
|
||||
4730 17164
|
||||
4956 15689
|
||||
5180 14397
|
||||
5398 13260
|
||||
5628 12249
|
||||
5852 11346
|
||||
6071 10537
|
||||
6298 9812
|
||||
6522 9161
|
||||
6742 8572
|
||||
6971 8038
|
||||
7195 2915
|
||||
7419 2744
|
||||
7644 2587
|
||||
7866 2444
|
||||
8090 2311
|
||||
923 45612
|
||||
1143 68010
|
||||
1370 94894
|
||||
1596 126514
|
||||
1820 163014
|
||||
2044 203564
|
||||
2268 249156
|
||||
2492 299226
|
||||
2716 354138
|
||||
2940 413022
|
||||
3163 477406
|
||||
3387 545876
|
||||
3612 619044
|
||||
3835 696754
|
||||
4060 779174
|
||||
4284 866216
|
||||
4508 958100
|
||||
4731 1055898
|
||||
4954 1162294
|
||||
5179 1267654
|
||||
5404 1377572
|
||||
5628 1503736
|
||||
5852 1622310
|
||||
6076 1746624
|
||||
6299 1875390
|
||||
6524 2009086
|
||||
6748 2145990
|
||||
6971 2289044
|
||||
7196 2891644
|
||||
7418 3064792
|
||||
7644 3249780
|
||||
7868 3455868
|
||||
8092 3644238
|
||||
|
BIN
logs/mult.png
Before Width: | Height: | Size: 7.9 KiB After Width: | Height: | Size: 6.6 KiB |
@ -1,33 +1,33 @@
|
||||
924 374171
|
||||
1147 243163
|
||||
1371 177111
|
||||
1596 134465
|
||||
1819 105619
|
||||
2044 85145
|
||||
2266 70086
|
||||
2488 58717
|
||||
2715 49869
|
||||
2939 42894
|
||||
3164 37389
|
||||
3387 33510
|
||||
3610 29993
|
||||
3836 27205
|
||||
4060 24751
|
||||
4281 22576
|
||||
4508 20670
|
||||
4732 19019
|
||||
4954 17527
|
||||
5180 16217
|
||||
5404 15044
|
||||
5624 14003
|
||||
5849 13051
|
||||
6076 12067
|
||||
6300 11438
|
||||
6524 10772
|
||||
6748 10298
|
||||
6972 9715
|
||||
7195 9330
|
||||
7416 8836
|
||||
7644 8465
|
||||
7864 8042
|
||||
8091 7735
|
||||
921 92388
|
||||
1148 61410
|
||||
1372 43799
|
||||
1594 33047
|
||||
1819 26913
|
||||
2043 21996
|
||||
2268 18453
|
||||
2492 15623
|
||||
2715 13378
|
||||
2940 11626
|
||||
3164 10252
|
||||
3385 9291
|
||||
3610 8348
|
||||
3835 7615
|
||||
4060 6928
|
||||
4283 6401
|
||||
4508 5836
|
||||
4732 5387
|
||||
4955 4985
|
||||
5178 4614
|
||||
5404 4300
|
||||
5622 4005
|
||||
5852 3742
|
||||
6073 3502
|
||||
6298 3262
|
||||
6524 3137
|
||||
6748 2967
|
||||
6971 2807
|
||||
7195 2679
|
||||
7420 2571
|
||||
7643 2442
|
||||
7867 2324
|
||||
8091 2235
|
||||
|
@ -1,13 +0,0 @@
|
||||
To use the pretty graphs you have to first build/run the ltmtest from the root directory of the package.
|
||||
Todo this type
|
||||
|
||||
make timing ; ltmtest
|
||||
|
||||
in the root. It will run for a while [about ten minutes on most PCs] and produce a series of .log files in logs/.
|
||||
|
||||
After doing that run "gnuplot graphs.dem" to make the PNGs. If you managed todo that all so far just open index.html to view
|
||||
them all :-)
|
||||
|
||||
Have fun
|
||||
|
||||
Tom
|
@ -1,16 +0,0 @@
|
||||
224 8113248
|
||||
448 6585584
|
||||
672 5687678
|
||||
896 4761144
|
||||
1120 4111592
|
||||
1344 3995154
|
||||
1568 3532387
|
||||
1792 3225400
|
||||
2016 2963960
|
||||
2240 2720112
|
||||
2464 2533952
|
||||
2688 2307168
|
||||
2912 2287064
|
||||
3136 2150160
|
||||
3360 2035992
|
||||
3584 1936304
|
Before Width: | Height: | Size: 6.7 KiB |
@ -1,7 +0,0 @@
|
||||
513 195
|
||||
769 68
|
||||
1025 31
|
||||
2049 4
|
||||
2561 2
|
||||
3073 1
|
||||
4097 0
|
BIN
logs/p4/expt.png
Before Width: | Height: | Size: 6.4 KiB |
@ -1,7 +0,0 @@
|
||||
532 393
|
||||
784 158
|
||||
1036 79
|
||||
1540 27
|
||||
2072 12
|
||||
3080 4
|
||||
4116 1
|
@ -1,17 +0,0 @@
|
||||
set terminal png color
|
||||
set size 1.75
|
||||
set ylabel "Operations per Second"
|
||||
set xlabel "Operand size (bits)"
|
||||
|
||||
set output "addsub.png"
|
||||
plot 'add.log' smooth bezier title "Addition", 'sub.log' smooth bezier title "Subtraction"
|
||||
|
||||
set output "mult.png"
|
||||
plot 'sqr.log' smooth bezier title "Squaring (without Karatsuba)", 'sqr_kara.log' smooth bezier title "Squaring (Karatsuba)", 'mult.log' smooth bezier title "Multiplication (without Karatsuba)", 'mult_kara.log' smooth bezier title "Multiplication (Karatsuba)"
|
||||
|
||||
set output "expt.png"
|
||||
plot 'expt.log' smooth bezier title "Exptmod (Montgomery)", 'expt_dr.log' smooth bezier title "Exptmod (Dimminished Radix)"
|
||||
|
||||
set output "invmod.png"
|
||||
plot 'invmod.log' smooth bezier title "Modular Inverse"
|
||||
|
@ -1,24 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>LibTomMath Log Plots</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Addition and Subtraction</h1>
|
||||
<center><img src=addsub.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Multipliers</h1>
|
||||
<center><img src=mult.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Exptmod</h1>
|
||||
<center><img src=expt.png></center>
|
||||
<hr>
|
||||
|
||||
<h1>Modular Inverse</h1>
|
||||
<center><img src=invmod.png></center>
|
||||
<hr>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -1,32 +0,0 @@
|
||||
112 13608
|
||||
224 6872
|
||||
336 4264
|
||||
448 2792
|
||||
560 2144
|
||||
672 1560
|
||||
784 1296
|
||||
896 1672
|
||||
1008 896
|
||||
1120 736
|
||||
1232 1024
|
||||
1344 888
|
||||
1456 472
|
||||
1568 680
|
||||
1680 373
|
||||
1792 328
|
||||
1904 484
|
||||
2016 436
|
||||
2128 232
|
||||
2240 211
|
||||
2352 200
|
||||
2464 177
|
||||
2576 293
|
||||
2688 262
|
||||
2800 251
|
||||
2912 137
|
||||
3024 216
|
||||
3136 117
|
||||
3248 113
|
||||
3360 181
|
||||
3472 98
|
||||
3584 158
|
Before Width: | Height: | Size: 5.5 KiB |
@ -1,17 +0,0 @@
|
||||
896 77600
|
||||
1344 35776
|
||||
1792 19688
|
||||
2240 13248
|
||||
2688 9424
|
||||
3136 7056
|
||||
3584 5464
|
||||
4032 4368
|
||||
4480 3568
|
||||
4928 2976
|
||||
5376 2520
|
||||
5824 2152
|
||||
6272 1872
|
||||
6720 1632
|
||||
7168 650
|
||||
7616 576
|
||||
8064 515
|
BIN
logs/p4/mult.png
Before Width: | Height: | Size: 7.9 KiB |
@ -1,17 +0,0 @@
|
||||
896 77752
|
||||
1344 35832
|
||||
1792 19688
|
||||
2240 14704
|
||||
2688 10832
|
||||
3136 8336
|
||||
3584 6600
|
||||
4032 5424
|
||||
4480 4648
|
||||
4928 3976
|
||||
5376 3448
|
||||
5824 3016
|
||||
6272 2664
|
||||
6720 2384
|
||||
7168 2120
|
||||
7616 1912
|
||||
8064 1752
|
@ -1,17 +0,0 @@
|
||||
896 128088
|
||||
1344 63640
|
||||
1792 37968
|
||||
2240 25488
|
||||
2688 18176
|
||||
3136 13672
|
||||
3584 4920
|
||||
4032 3912
|
||||
4480 3160
|
||||
4928 2616
|
||||
5376 2216
|
||||
5824 1896
|
||||
6272 1624
|
||||
6720 1408
|
||||
7168 1240
|
||||
7616 1096
|
||||
8064 984
|
@ -1,17 +0,0 @@
|
||||
896 127456
|
||||
1344 63752
|
||||
1792 37920
|
||||
2240 25440
|
||||
2688 18200
|
||||
3136 13728
|
||||
3584 10968
|
||||
4032 9072
|
||||
4480 7608
|
||||
4928 6440
|
||||
5376 5528
|
||||
5824 4768
|
||||
6272 4328
|
||||
6720 3888
|
||||
7168 3504
|
||||
7616 3176
|
||||
8064 2896
|
@ -1,16 +0,0 @@
|
||||
224 7355896
|
||||
448 6162880
|
||||
672 5218984
|
||||
896 4622776
|
||||
1120 3999320
|
||||
1344 3629480
|
||||
1568 3290384
|
||||
1792 2954752
|
||||
2016 2737056
|
||||
2240 2563320
|
||||
2464 2451928
|
||||
2688 2310920
|
||||
2912 2139048
|
||||
3136 2034080
|
||||
3360 1890800
|
||||
3584 1808624
|
66
logs/sqr.log
@ -1,33 +1,33 @@
|
||||
922 471095
|
||||
1147 337137
|
||||
1366 254327
|
||||
1596 199732
|
||||
1819 161225
|
||||
2044 132852
|
||||
2268 111493
|
||||
2490 94864
|
||||
2715 81745
|
||||
2940 71187
|
||||
3162 62575
|
||||
3387 55418
|
||||
3612 14540
|
||||
3836 12944
|
||||
4060 11627
|
||||
4281 10546
|
||||
4508 9502
|
||||
4730 8688
|
||||
4954 7937
|
||||
5180 7273
|
||||
5402 6701
|
||||
5627 6189
|
||||
5850 5733
|
||||
6076 5310
|
||||
6300 4933
|
||||
6522 4631
|
||||
6748 4313
|
||||
6971 4064
|
||||
7196 3801
|
||||
7420 3576
|
||||
7642 3388
|
||||
7868 3191
|
||||
8092 3020
|
||||
924 26026
|
||||
1146 37682
|
||||
1370 51714
|
||||
1595 68130
|
||||
1820 86850
|
||||
2043 107880
|
||||
2267 131236
|
||||
2490 156828
|
||||
2716 184704
|
||||
2940 214934
|
||||
3162 247424
|
||||
3388 282494
|
||||
3608 308390
|
||||
3834 345978
|
||||
4060 386156
|
||||
4282 427648
|
||||
4505 471556
|
||||
4731 517948
|
||||
4954 566396
|
||||
5180 618292
|
||||
5402 670130
|
||||
5628 725674
|
||||
5852 783310
|
||||
6076 843480
|
||||
6300 905136
|
||||
6524 969132
|
||||
6748 1033680
|
||||
6971 1100912
|
||||
7195 1170954
|
||||
7420 1252576
|
||||
7643 1325038
|
||||
7867 1413890
|
||||
8091 1493140
|
||||
|
@ -1,33 +1,33 @@
|
||||
922 470930
|
||||
1148 337217
|
||||
1372 254433
|
||||
1596 199827
|
||||
1820 161204
|
||||
2043 132871
|
||||
2267 111522
|
||||
2488 94932
|
||||
2714 81814
|
||||
2939 71231
|
||||
3164 62616
|
||||
3385 55467
|
||||
3611 44426
|
||||
3836 40695
|
||||
4060 37391
|
||||
4283 34371
|
||||
4508 31779
|
||||
4732 29499
|
||||
4956 27426
|
||||
5177 25598
|
||||
5403 23944
|
||||
5628 22416
|
||||
5851 21052
|
||||
6076 19781
|
||||
6299 18588
|
||||
6523 17539
|
||||
6746 16618
|
||||
6972 15705
|
||||
7196 13582
|
||||
7420 13004
|
||||
7643 12496
|
||||
7868 11963
|
||||
8092 11497
|
||||
923 165854
|
||||
1146 112539
|
||||
1372 80388
|
||||
1595 60051
|
||||
1820 47498
|
||||
2044 38017
|
||||
2268 31935
|
||||
2492 27373
|
||||
2714 23798
|
||||
2939 20630
|
||||
3164 18198
|
||||
3388 16191
|
||||
3612 14538
|
||||
3836 13038
|
||||
4058 11683
|
||||
4284 10915
|
||||
4508 9998
|
||||
4731 9271
|
||||
4954 8555
|
||||
5180 7910
|
||||
5404 7383
|
||||
5628 7012
|
||||
5852 6527
|
||||
6075 6175
|
||||
6299 5737
|
||||
6524 5398
|
||||
6744 5110
|
||||
6971 4864
|
||||
7196 4567
|
||||
7420 4371
|
||||
7644 4182
|
||||
7868 3981
|
||||
8092 3758
|
||||
|
32
logs/sub.log
@ -1,16 +1,16 @@
|
||||
224 16370431
|
||||
448 13327848
|
||||
672 11009401
|
||||
896 9125342
|
||||
1120 7930419
|
||||
1344 7114040
|
||||
1568 6506998
|
||||
1792 5899346
|
||||
2016 5435327
|
||||
2240 5038931
|
||||
2464 4696364
|
||||
2688 4425678
|
||||
2912 4134476
|
||||
3136 3913280
|
||||
3360 3692536
|
||||
3584 3505219
|
||||
224 2012
|
||||
448 2208
|
||||
672 2366
|
||||
896 2532
|
||||
1120 2682
|
||||
1344 2838
|
||||
1568 3016
|
||||
1792 3146
|
||||
2016 3318
|
||||
2240 3538
|
||||
2464 3756
|
||||
2688 3914
|
||||
2912 4060
|
||||
3136 4216
|
||||
3360 4392
|
||||
3584 4550
|
||||
|
40
makefile
@ -12,7 +12,10 @@ CFLAGS += -O3 -funroll-loops
|
||||
#x86 optimizations [should be valid for any GCC install though]
|
||||
CFLAGS += -fomit-frame-pointer
|
||||
|
||||
VERSION=0.30
|
||||
#debug
|
||||
#CFLAGS += -g3
|
||||
|
||||
VERSION=0.31
|
||||
|
||||
default: libtommath.a
|
||||
|
||||
@ -20,7 +23,7 @@ default: libtommath.a
|
||||
LIBNAME=libtommath.a
|
||||
HEADERS=tommath.h
|
||||
|
||||
#LIBPATH-The directory for libtomcrypt to be installed to.
|
||||
#LIBPATH-The directory for libtommath to be installed to.
|
||||
#INCPATH-The directory to install the header files for libtommath.
|
||||
#DATAPATH-The directory to install the pdf docs.
|
||||
DESTDIR=
|
||||
@ -58,6 +61,30 @@ libtommath.a: $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
|
||||
ranlib libtommath.a
|
||||
|
||||
|
||||
#make a profiled library (takes a while!!!)
|
||||
#
|
||||
# This will build the library with profile generation
|
||||
# then run the test demo and rebuild the library.
|
||||
#
|
||||
# So far I've seen improvements in the MP math
|
||||
profiled:
|
||||
make CFLAGS="$(CFLAGS) -fprofile-arcs -DTESTING" timing
|
||||
./ltmtest
|
||||
rm -f *.a *.o ltmtest
|
||||
make CFLAGS="$(CFLAGS) -fbranch-probabilities"
|
||||
|
||||
#make a single object profiled library
|
||||
profiled_single:
|
||||
perl gen.pl
|
||||
$(CC) $(CFLAGS) -fprofile-arcs -DTESTING -c mpi.c -o mpi.o
|
||||
$(CC) $(CFLAGS) -DTESTING -DTIMER demo/timing.c mpi.o -o ltmtest
|
||||
./ltmtest
|
||||
rm -f *.o ltmtest
|
||||
$(CC) $(CFLAGS) -fbranch-probabilities -DTESTING -c mpi.c -o mpi.o
|
||||
$(AR) $(ARFLAGS) libtommath.a mpi.o
|
||||
ranlib libtommath.a
|
||||
|
||||
install: libtommath.a
|
||||
install -d -g root -o root $(DESTDIR)$(LIBPATH)
|
||||
install -d -g root -o root $(DESTDIR)$(INCPATH)
|
||||
@ -71,7 +98,7 @@ mtest: test
|
||||
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s
|
||||
|
||||
timing: libtommath.a
|
||||
$(CC) $(CFLAGS) -DTIMER demo/demo.c libtommath.a -o ltmtest -s
|
||||
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest -s
|
||||
|
||||
# makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think]
|
||||
docdvi: tommath.src
|
||||
@ -106,10 +133,13 @@ mandvi: bn.tex
|
||||
manual: mandvi
|
||||
pdflatex bn >/dev/null
|
||||
rm -f bn.aux bn.dvi bn.log bn.idx bn.lof bn.out bn.toc
|
||||
|
||||
|
||||
pretty:
|
||||
perl pretty.build
|
||||
|
||||
clean:
|
||||
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
|
||||
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c
|
||||
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex *~ demo/*~ etc/*~
|
||||
cd etc ; make clean
|
||||
cd pics ; make clean
|
||||
|
||||
|
@ -30,7 +30,8 @@ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
|
||||
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
|
||||
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
|
||||
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
|
||||
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj
|
||||
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
|
||||
bn_mp_init_set.obj bn_mp_init_set_int.obj
|
||||
|
||||
TARGET = libtommath.lib
|
||||
|
||||
|
@ -35,7 +35,8 @@ bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
|
||||
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
|
||||
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
|
||||
bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
|
||||
bn_mp_init_set_int.o
|
||||
|
||||
# make a Windows DLL via Cygwin
|
||||
windll: $(OBJECTS)
|
||||
|
110
makefile.icc
Normal file
@ -0,0 +1,110 @@
|
||||
#Makefile for ICC
|
||||
#
|
||||
#Tom St Denis
|
||||
CC=icc
|
||||
|
||||
CFLAGS += -I./
|
||||
|
||||
# optimize for SPEED
|
||||
#
|
||||
# -mcpu= can be pentium, pentiumpro (covers PII through PIII) or pentium4
|
||||
# -ax? specifies make code specifically for ? but compatible with IA-32
|
||||
# -x? specifies compile solely for ? [not specifically IA-32 compatible]
|
||||
#
|
||||
# where ? is
|
||||
# K - PIII
|
||||
# W - first P4 [Williamette]
|
||||
# N - P4 Northwood
|
||||
# P - P4 Prescott
|
||||
# B - Blend of P4 and PM [mobile]
|
||||
#
|
||||
# Default to just generic max opts
|
||||
CFLAGS += -O3 -xN
|
||||
|
||||
default: libtommath.a
|
||||
|
||||
#default files to install
|
||||
LIBNAME=libtommath.a
|
||||
HEADERS=tommath.h
|
||||
|
||||
#LIBPATH-The directory for libtomcrypt to be installed to.
|
||||
#INCPATH-The directory to install the header files for libtommath.
|
||||
#DATAPATH-The directory to install the pdf docs.
|
||||
DESTDIR=
|
||||
LIBPATH=/usr/lib
|
||||
INCPATH=/usr/include
|
||||
DATAPATH=/usr/share/doc/libtommath/pdf
|
||||
|
||||
OBJECTS=bncore.o bn_mp_init.o bn_mp_clear.o bn_mp_exch.o bn_mp_grow.o bn_mp_shrink.o \
|
||||
bn_mp_clamp.o bn_mp_zero.o bn_mp_set.o bn_mp_set_int.o bn_mp_init_size.o bn_mp_copy.o \
|
||||
bn_mp_init_copy.o bn_mp_abs.o bn_mp_neg.o bn_mp_cmp_mag.o bn_mp_cmp.o bn_mp_cmp_d.o \
|
||||
bn_mp_rshd.o bn_mp_lshd.o bn_mp_mod_2d.o bn_mp_div_2d.o bn_mp_mul_2d.o bn_mp_div_2.o \
|
||||
bn_mp_mul_2.o bn_s_mp_add.o bn_s_mp_sub.o bn_fast_s_mp_mul_digs.o bn_s_mp_mul_digs.o \
|
||||
bn_fast_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs.o bn_fast_s_mp_sqr.o bn_s_mp_sqr.o \
|
||||
bn_mp_add.o bn_mp_sub.o bn_mp_karatsuba_mul.o bn_mp_mul.o bn_mp_karatsuba_sqr.o \
|
||||
bn_mp_sqr.o bn_mp_div.o bn_mp_mod.o bn_mp_add_d.o bn_mp_sub_d.o bn_mp_mul_d.o \
|
||||
bn_mp_div_d.o bn_mp_mod_d.o bn_mp_expt_d.o bn_mp_addmod.o bn_mp_submod.o \
|
||||
bn_mp_mulmod.o bn_mp_sqrmod.o bn_mp_gcd.o bn_mp_lcm.o bn_fast_mp_invmod.o bn_mp_invmod.o \
|
||||
bn_mp_reduce.o bn_mp_montgomery_setup.o bn_fast_mp_montgomery_reduce.o bn_mp_montgomery_reduce.o \
|
||||
bn_mp_exptmod_fast.o bn_mp_exptmod.o bn_mp_2expt.o bn_mp_n_root.o bn_mp_jacobi.o bn_reverse.o \
|
||||
bn_mp_count_bits.o bn_mp_read_unsigned_bin.o bn_mp_read_signed_bin.o bn_mp_to_unsigned_bin.o \
|
||||
bn_mp_to_signed_bin.o bn_mp_unsigned_bin_size.o bn_mp_signed_bin_size.o \
|
||||
bn_mp_xor.o bn_mp_and.o bn_mp_or.o bn_mp_rand.o bn_mp_montgomery_calc_normalization.o \
|
||||
bn_mp_prime_is_divisible.o bn_prime_tab.o bn_mp_prime_fermat.o bn_mp_prime_miller_rabin.o \
|
||||
bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
|
||||
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
|
||||
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
|
||||
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
|
||||
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
|
||||
bn_mp_init_multi.o bn_mp_clear_multi.o bn_prime_sizes_tab.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
|
||||
bn_mp_init_set_int.o
|
||||
|
||||
libtommath.a: $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
|
||||
ranlib libtommath.a
|
||||
|
||||
#make a profiled library (takes a while!!!)
|
||||
#
|
||||
# This will build the library with profile generation
|
||||
# then run the test demo and rebuild the library.
|
||||
#
|
||||
# So far I've seen improvements in the MP math
|
||||
profiled:
|
||||
make -f makefile.icc CFLAGS="$(CFLAGS) -prof_gen -DTESTING" timing
|
||||
./ltmtest
|
||||
rm -f *.a *.o ltmtest
|
||||
make -f makefile.icc CFLAGS="$(CFLAGS) -prof_use"
|
||||
|
||||
#make a single object profiled library
|
||||
profiled_single:
|
||||
perl gen.pl
|
||||
$(CC) $(CFLAGS) -prof_gen -DTESTING -c mpi.c -o mpi.o
|
||||
$(CC) $(CFLAGS) -DTESTING -DTIMER demo/demo.c mpi.o -o ltmtest
|
||||
./ltmtest
|
||||
rm -f *.o ltmtest
|
||||
$(CC) $(CFLAGS) -prof_use -ip -DTESTING -c mpi.c -o mpi.o
|
||||
$(AR) $(ARFLAGS) libtommath.a mpi.o
|
||||
ranlib libtommath.a
|
||||
|
||||
install: libtommath.a
|
||||
install -d -g root -o root $(DESTDIR)$(LIBPATH)
|
||||
install -d -g root -o root $(DESTDIR)$(INCPATH)
|
||||
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
|
||||
test: libtommath.a demo/demo.o
|
||||
$(CC) demo/demo.o libtommath.a -o test
|
||||
|
||||
mtest: test
|
||||
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest
|
||||
|
||||
timing: libtommath.a
|
||||
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest
|
||||
|
||||
clean:
|
||||
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
|
||||
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.il etc/*.il *.dyn
|
||||
cd etc ; make clean
|
||||
cd pics ; make clean
|
@ -29,7 +29,8 @@ bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
|
||||
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
|
||||
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
|
||||
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_prime_sizes_tab.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
|
||||
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj
|
||||
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
|
||||
bn_mp_init_set.obj bn_mp_init_set_int.obj
|
||||
|
||||
library: $(OBJECTS)
|
||||
lib /out:tommath.lib $(OBJECTS)
|
||||
|
BIN
poster.pdf
@ -452,7 +452,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
}
|
||||
|
||||
/* setup dest */
|
||||
olduse = c->used;
|
||||
olduse = c->used;
|
||||
c->used = digs;
|
||||
|
||||
{
|
||||
@ -779,7 +779,7 @@ mp_2expt (mp_int * a, int b)
|
||||
a->used = b / DIGIT_BIT + 1;
|
||||
|
||||
/* put the single bit in its place */
|
||||
a->dp[b / DIGIT_BIT] = 1 << (b % DIGIT_BIT);
|
||||
a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
@ -1142,10 +1142,14 @@ mp_clamp (mp_int * a)
|
||||
void
|
||||
mp_clear (mp_int * a)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* only do anything if a hasn't been freed previously */
|
||||
if (a->dp != NULL) {
|
||||
/* first zero the digits */
|
||||
memset (a->dp, 0, sizeof (mp_digit) * a->used);
|
||||
for (i = 0; i < a->used; i++) {
|
||||
a->dp[i] = 0;
|
||||
}
|
||||
|
||||
/* free ram */
|
||||
XFREE(a->dp);
|
||||
@ -1677,7 +1681,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
*/
|
||||
|
||||
/* get sign before writing to c */
|
||||
x.sign = a->sign;
|
||||
x.sign = x.used == 0 ? MP_ZPOS : a->sign;
|
||||
|
||||
if (c != NULL) {
|
||||
mp_clamp (&q);
|
||||
@ -3083,15 +3087,22 @@ int mp_grow (mp_int * a, int size)
|
||||
*/
|
||||
#include <tommath.h>
|
||||
|
||||
/* init a new bigint */
|
||||
/* init a new mp_int */
|
||||
int mp_init (mp_int * a)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* allocate memory required and clear it */
|
||||
a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC);
|
||||
a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
|
||||
if (a->dp == NULL) {
|
||||
return MP_MEM;
|
||||
}
|
||||
|
||||
/* set the digits to zero */
|
||||
for (i = 0; i < MP_PREC; i++) {
|
||||
a->dp[i] = 0;
|
||||
}
|
||||
|
||||
/* set the used to zero, allocated digits to the default precision
|
||||
* and sign to positive */
|
||||
a->used = 0;
|
||||
@ -3753,9 +3764,6 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
goto X0Y0;
|
||||
|
||||
/* now shift the digits */
|
||||
x0.sign = x1.sign = a->sign;
|
||||
y0.sign = y1.sign = b->sign;
|
||||
|
||||
x0.used = y0.used = B;
|
||||
x1.used = a->used - B;
|
||||
y1.used = b->used - B;
|
||||
@ -4484,7 +4492,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
|
||||
res = s_mp_mul (a, b, c);
|
||||
}
|
||||
}
|
||||
c->sign = neg;
|
||||
c->sign = (c->used > 0) ? neg : MP_ZPOS;
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -6090,7 +6098,8 @@ mp_reduce_2k_setup(mp_int *a, mp_digit *d)
|
||||
/* determines if mp_reduce_2k can be used */
|
||||
int mp_reduce_is_2k(mp_int *a)
|
||||
{
|
||||
int ix, iy, iz, iw;
|
||||
int ix, iy, iw;
|
||||
mp_digit iz;
|
||||
|
||||
if (a->used == 0) {
|
||||
return 0;
|
||||
@ -6107,7 +6116,7 @@ int mp_reduce_is_2k(mp_int *a)
|
||||
return 0;
|
||||
}
|
||||
iz <<= 1;
|
||||
if (iz > (int)MP_MASK) {
|
||||
if (iz > (mp_digit)MP_MASK) {
|
||||
++iw;
|
||||
iz = 1;
|
||||
}
|
||||
@ -8396,14 +8405,16 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
|
||||
|
||||
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
|
||||
-------------------------------------------------------------
|
||||
Intel P4 /GCC v3.2 / 70/ 108
|
||||
AMD Athlon XP /GCC v3.2 / 109/ 127
|
||||
|
||||
Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build
|
||||
Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build
|
||||
Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build
|
||||
Intel P4 Northwood /ICC v8.0 / 54/ 76/profiled_single build
|
||||
AMD Athlon XP /GCC v3.2 / 109/ 127/
|
||||
|
||||
*/
|
||||
|
||||
/* configured for a AMD XP Thoroughbred core with etc/tune.c */
|
||||
int KARATSUBA_MUL_CUTOFF = 109, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 127, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
|
||||
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
|
||||
TOOM_SQR_CUTOFF = 400;
|
||||
|
66
pretty.build
Normal file
@ -0,0 +1,66 @@
|
||||
#!/bin/perl -w
|
||||
#
|
||||
# Cute little builder for perl
|
||||
# Total waste of development time...
|
||||
#
|
||||
# This will build all the object files and then the archive .a file
|
||||
# requires GCC, GNU make and a sense of humour.
|
||||
#
|
||||
# Tom St Denis
|
||||
use strict;
|
||||
|
||||
my $count = 0;
|
||||
my $starttime = time;
|
||||
my $rate = 0;
|
||||
print "Scanning for source files...\n";
|
||||
foreach my $filename (glob "*.c") {
|
||||
++$count;
|
||||
}
|
||||
print "Source files to build: $count\nBuilding...\n";
|
||||
my $i = 0;
|
||||
my $lines = 0;
|
||||
my $filesbuilt = 0;
|
||||
foreach my $filename (glob "*.c") {
|
||||
printf("Building %3.2f%%, ", (++$i/$count)*100.0);
|
||||
if ($i % 4 == 0) { print "/, "; }
|
||||
if ($i % 4 == 1) { print "-, "; }
|
||||
if ($i % 4 == 2) { print "\\, "; }
|
||||
if ($i % 4 == 3) { print "|, "; }
|
||||
if ($rate > 0) {
|
||||
my $tleft = ($count - $i) / $rate;
|
||||
my $tsec = $tleft%60;
|
||||
my $tmin = ($tleft/60)%60;
|
||||
my $thour = ($tleft/3600)%60;
|
||||
printf("%2d:%02d:%02d left, ", $thour, $tmin, $tsec);
|
||||
}
|
||||
my $cnt = ($i/$count)*30.0;
|
||||
my $x = 0;
|
||||
print "[";
|
||||
for (; $x < $cnt; $x++) { print "#"; }
|
||||
for (; $x < 30; $x++) { print " "; }
|
||||
print "]\r";
|
||||
my $tmp = $filename;
|
||||
$tmp =~ s/\.c/".o"/ge;
|
||||
if (open(SRC, "<$tmp")) {
|
||||
close SRC;
|
||||
} else {
|
||||
!system("make $tmp > /dev/null 2>/dev/null") or die "\nERROR: Failed to make $tmp!!!\n";
|
||||
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
|
||||
++$lines while (<SRC>);
|
||||
close SRC or die "Error closing $filename after reading: $!";
|
||||
++$filesbuilt;
|
||||
}
|
||||
|
||||
# update timer
|
||||
if (time != $starttime) {
|
||||
my $delay = time - $starttime;
|
||||
$rate = $i/$delay;
|
||||
}
|
||||
}
|
||||
|
||||
# finish building the library
|
||||
printf("\nFinished building source (%d seconds, %3.2f files per second).\n", time - $starttime, $rate);
|
||||
print "Compiled approximately $filesbuilt files and $lines lines of code.\n";
|
||||
print "Doing final make (building archive...)\n";
|
||||
!system("make > /dev/null 2>/dev/null") or die "\nERROR: Failed to perform last make command!!!\n";
|
||||
print "done.\n";
|
BIN
tommath.pdf
163
tommath.src
@ -258,7 +258,7 @@ floating point is meant to be implemented in hardware the precision of the manti
|
||||
a mantissa of much larger precision than hardware alone can efficiently support. This approach could be useful where
|
||||
scientific applications must minimize the total output error over long calculations.
|
||||
|
||||
Another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$).
|
||||
Yet another use for large integers is within arithmetic on polynomials of large characteristic (i.e. $GF(p)[x]$ for large $p$).
|
||||
In fact the library discussed within this text has already been used to form a polynomial basis library\footnote{See \url{http://poly.libtomcrypt.org} for more details.}.
|
||||
|
||||
\subsection{Benefits of Multiple Precision Arithmetic}
|
||||
@ -316,7 +316,7 @@ the reader how the algorithms fit together as well as where to start on various
|
||||
|
||||
\section{Discussion and Notation}
|
||||
\subsection{Notation}
|
||||
A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1} ... x_1 x_0)_{ \beta }$ and represent
|
||||
A multiple precision integer of $n$-digits shall be denoted as $x = (x_{n-1}, \ldots, x_1, x_0)_{ \beta }$ and represent
|
||||
the integer $x \equiv \sum_{i=0}^{n-1} x_i\beta^i$. The elements of the array $x$ are said to be the radix $\beta$ digits
|
||||
of the integer. For example, $x = (1,2,3)_{10}$ would represent the integer
|
||||
$1\cdot 10^2 + 2\cdot10^1 + 3\cdot10^0 = 123$.
|
||||
@ -339,12 +339,11 @@ algorithms will be used to establish the relevant theory which will subsequently
|
||||
precision algorithm to solve the same problem.
|
||||
|
||||
\subsection{Precision Notation}
|
||||
For the purposes of this text a single precision variable must be able to represent integers in the range
|
||||
$0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range
|
||||
$0 \le x < q \beta^2$. The variable $\beta$ represents the radix of a single digit of a multiple precision integer and
|
||||
must be of the form $q^p$ for $q, p \in \Z^+$. The extra radix-$q$ factor allows additions and subtractions to proceed
|
||||
without truncation of the carry. Since all modern computers are binary, it is assumed that $q$ is two, for all intents
|
||||
and purposes.
|
||||
The variable $\beta$ represents the radix of a single digit of a multiple precision integer and
|
||||
must be of the form $q^p$ for $q, p \in \Z^+$. A single precision variable must be able to represent integers in
|
||||
the range $0 \le x < q \beta$ while a double precision variable must be able to represent integers in the range
|
||||
$0 \le x < q \beta^2$. The extra radix-$q$ factor allows additions and subtractions to proceed without truncation of the
|
||||
carry. Since all modern computers are binary, it is assumed that $q$ is two.
|
||||
|
||||
\index{mp\_digit} \index{mp\_word}
|
||||
Within the source code that will be presented for each algorithm, the data type \textbf{mp\_digit} will represent
|
||||
@ -376,7 +375,7 @@ the $/$ division symbol is used the intention is to perform an integer division
|
||||
$5/2 = 2$ which will often be written as $\lfloor 5/2 \rfloor = 2$ for clarity. When an expression is written as a
|
||||
fraction a real value division is implied, for example ${5 \over 2} = 2.5$.
|
||||
|
||||
The norm of a multiple precision integer, for example, $\vert \vert x \vert \vert$ will be used to represent the number of digits in the representation
|
||||
The norm of a multiple precision integer, for example $\vert \vert x \vert \vert$, will be used to represent the number of digits in the representation
|
||||
of the integer. For example, $\vert \vert 123 \vert \vert = 3$ and $\vert \vert 79452 \vert \vert = 5$.
|
||||
|
||||
\subsection{Work Effort}
|
||||
@ -569,7 +568,7 @@ By building outwards from a base foundation instead of using a parallel design m
|
||||
highly modular. Being highly modular is a desirable property of any project as it often means the resulting product
|
||||
has a small footprint and updates are easy to perform.
|
||||
|
||||
Usually when I start a project I will begin with the header file. I define the data types I think I will need and
|
||||
Usually when I start a project I will begin with the header files. I define the data types I think I will need and
|
||||
prototype the initial functions that are not dependent on other functions (within the library). After I
|
||||
implement these base functions I prototype more dependent functions and implement them. The process repeats until
|
||||
I implement all of the functions I require. For example, in the case of LibTomMath I implemented functions such as
|
||||
@ -619,14 +618,26 @@ any such data type but it does provide for making composite data types known as
|
||||
used within LibTomMath.
|
||||
|
||||
\index{mp\_int}
|
||||
\begin{verbatim}
|
||||
typedef struct {
|
||||
int used, alloc, sign;
|
||||
mp_digit *dp;
|
||||
} mp_int;
|
||||
\end{verbatim}
|
||||
\begin{figure}[here]
|
||||
\begin{center}
|
||||
\begin{small}
|
||||
%\begin{verbatim}
|
||||
\begin{tabular}{|l|}
|
||||
\hline
|
||||
typedef struct \{ \\
|
||||
\hspace{3mm}int used, alloc, sign;\\
|
||||
\hspace{3mm}mp\_digit *dp;\\
|
||||
\} \textbf{mp\_int}; \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
%\end{verbatim}
|
||||
\end{small}
|
||||
\caption{The mp\_int Structure}
|
||||
\label{fig:mpint}
|
||||
\end{center}
|
||||
\end{figure}
|
||||
|
||||
The mp\_int structure can be broken down as follows.
|
||||
The mp\_int structure (fig. \ref{fig:mpint}) can be broken down as follows.
|
||||
|
||||
\begin{enumerate}
|
||||
\item The \textbf{used} parameter denotes how many digits of the array \textbf{dp} contain the digits used to represent
|
||||
@ -701,9 +712,10 @@ fault by dereferencing memory not owned by the application.
|
||||
In the case of LibTomMath the only errors that are checked for are related to inappropriate inputs (division by zero for
|
||||
instance) and memory allocation errors. It will not check that the mp\_int passed to any function is valid nor
|
||||
will it check pointers for validity. Any function that can cause a runtime error will return an error code as an
|
||||
\textbf{int} data type with one of the following values.
|
||||
\textbf{int} data type with one of the following values (fig \ref{fig:errcodes}).
|
||||
|
||||
\index{MP\_OKAY} \index{MP\_VAL} \index{MP\_MEM}
|
||||
\begin{figure}[here]
|
||||
\begin{center}
|
||||
\begin{tabular}{|l|l|}
|
||||
\hline \textbf{Value} & \textbf{Meaning} \\
|
||||
@ -713,6 +725,9 @@ will it check pointers for validity. Any function that can cause a runtime erro
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
\caption{LibTomMath Error Codes}
|
||||
\label{fig:errcodes}
|
||||
\end{figure}
|
||||
|
||||
When an error is detected within a function it should free any memory it allocated, often during the initialization of
|
||||
temporary mp\_ints, and return as soon as possible. The goal is to leave the system in the same state it was when the
|
||||
@ -748,6 +763,7 @@ to zero. The \textbf{used} count set to zero and \textbf{sign} set to \textbf{M
|
||||
An mp\_int is said to be initialized if it is set to a valid, preferably default, state such that all of the members of the
|
||||
structure are set to valid values. The mp\_init algorithm will perform such an action.
|
||||
|
||||
\index{mp\_init}
|
||||
\begin{figure}[here]
|
||||
\begin{center}
|
||||
\begin{tabular}{l}
|
||||
@ -770,17 +786,23 @@ structure are set to valid values. The mp\_init algorithm will perform such an
|
||||
\end{figure}
|
||||
|
||||
\textbf{Algorithm mp\_init.}
|
||||
The \textbf{MP\_PREC} name represents a constant\footnote{Defined in the ``tommath.h'' header file within LibTomMath.}
|
||||
used to dictate the minimum precision of allocated mp\_int integers. Ideally, it is at least equal to $32$ since for most
|
||||
purposes that will be more than enough.
|
||||
The purpose of this function is to initialize an mp\_int structure so that the rest of the library can properly
|
||||
manipulte it. It is assumed that the input may not have had any of its members previously initialized which is certainly
|
||||
a valid assumption if the input resides on the stack.
|
||||
|
||||
Memory for the default number of digits is allocated first. If the allocation fails the algorithm returns immediately
|
||||
with the \textbf{MP\_MEM} error code. If the allocation succeeds the remaining members of the mp\_int structure
|
||||
must be initialized to reflect the default initial state.
|
||||
Before any of the members such as \textbf{sign}, \textbf{used} or \textbf{alloc} are initialized the memory for
|
||||
the digits is allocated. If this fails the function returns before setting any of the other members. The \textbf{MP\_PREC}
|
||||
name represents a constant\footnote{Defined in the ``tommath.h'' header file within LibTomMath.}
|
||||
used to dictate the minimum precision of newly initialized mp\_int integers. Ideally, it is at least equal to the smallest
|
||||
precision number you'll be working with.
|
||||
|
||||
The allocated digits are all set to zero (step three) to ensure they are in a known state. The \textbf{sign}, \textbf{used}
|
||||
and \textbf{alloc} are subsequently initialized to represent the zero integer. By step seven the algorithm returns a success
|
||||
code and the mp\_int $a$ has been successfully initialized to a valid state representing the integer zero.
|
||||
Allocating a block of digits at first instead of a single digit has the benefit of lowering the number of usually slow
|
||||
heap operations later functions will have to perform in the future. If \textbf{MP\_PREC} is set correctly the slack
|
||||
memory and the number of heap operations will be trivial.
|
||||
|
||||
Once the allocation has been made the digits have to be set to zero as well as the \textbf{used}, \textbf{sign} and
|
||||
\textbf{alloc} members initialized. This ensures that the mp\_int will always represent the default state of zero regardless
|
||||
of the original condition of the input.
|
||||
|
||||
\textbf{Remark.}
|
||||
This function introduces the idiosyncrasy that all iterative loops, commonly initiated with the ``for'' keyword, iterate incrementally
|
||||
@ -796,19 +818,21 @@ One immediate observation of this initializtion function is that it does not ret
|
||||
is assumed that the caller has already allocated memory for the mp\_int structure, typically on the application stack. The
|
||||
call to mp\_init() is used only to initialize the members of the structure to a known default state.
|
||||
|
||||
Before any of the other members of the structure are initialized memory from the application heap is allocated with
|
||||
the calloc() function (line @22,calloc@). The size of the allocated memory is large enough to hold \textbf{MP\_PREC}
|
||||
mp\_digit variables. The calloc() function is used instead\footnote{calloc() will allocate memory in the same
|
||||
manner as malloc() except that it also sets the contents to zero upon successfully allocating the memory.} of malloc()
|
||||
since digits have to be set to zero for the function to finish correctly. The \textbf{OPT\_CAST} token is a macro
|
||||
definition which will turn into a cast from void * to mp\_digit * for C++ compilers. It is not required for C compilers.
|
||||
Here we see (line @23,XMALLOC@) the memory allocation is performed first. This allows us to exit cleanly and quickly
|
||||
if there is an error. If the allocation fails the routine will return \textbf{MP\_MEM} to the caller to indicate there
|
||||
was a memory error. The function XMALLOC is what actually allocates the memory. Technically XMALLOC is not a function
|
||||
but a macro defined in ``tommath.h``. By default, XMALLOC will evaluate to malloc() which is the C library's built--in
|
||||
memory allocation routine.
|
||||
|
||||
After the memory has been successfully allocated the remainder of the members are initialized
|
||||
In order to assure the mp\_int is in a known state the digits must be set to zero. On most platforms this could have been
|
||||
accomplished by using calloc() instead of malloc(). However, to correctly initialize a integer type to a given value in a
|
||||
portable fashion you have to actually assign the value. The for loop (line @28,for@) performs this required
|
||||
operation.
|
||||
|
||||
After the memory has been successfully initialized the remainder of the members are initialized
|
||||
(lines @29,used@ through @31,sign@) to their respective default states. At this point the algorithm has succeeded and
|
||||
a success code is returned to the calling function.
|
||||
|
||||
If this function returns \textbf{MP\_OKAY} it is safe to assume the mp\_int structure has been properly initialized and
|
||||
is safe to use with other functions within the library.
|
||||
a success code is returned to the calling function. If this function returns \textbf{MP\_OKAY} it is safe to assume the
|
||||
mp\_int structure has been properly initialized and is safe to use with other functions within the library.
|
||||
|
||||
\subsection{Clearing an mp\_int}
|
||||
When an mp\_int is no longer required by the application, the memory that has been allocated for its digits must be
|
||||
@ -819,7 +843,7 @@ returned to the application's memory pool with the mp\_clear algorithm.
|
||||
\begin{tabular}{l}
|
||||
\hline Algorithm \textbf{mp\_clear}. \\
|
||||
\textbf{Input}. An mp\_int $a$ \\
|
||||
\textbf{Output}. The memory for $a$ is freed for reuse. \\
|
||||
\textbf{Output}. The memory for $a$ shall be deallocated. \\
|
||||
\hline \\
|
||||
1. If $a$ has been previously freed then return(\textit{MP\_OKAY}). \\
|
||||
2. for $n$ from 0 to $a.used - 1$ do \\
|
||||
@ -836,32 +860,31 @@ returned to the application's memory pool with the mp\_clear algorithm.
|
||||
\end{figure}
|
||||
|
||||
\textbf{Algorithm mp\_clear.}
|
||||
This algorithm releases the memory allocated for an mp\_int back into the memory pool for reuse. It is designed
|
||||
such that a given mp\_int structure can be cleared multiple times between initializations without attempting to
|
||||
free the memory twice\footnote{In ISO C for example, calling free() twice on the same memory block causes undefinied
|
||||
behaviour.}.
|
||||
This algorithm accomplishes two goals. First, it clears the digits and the other mp\_int members. This ensures that
|
||||
if a developer accidentally re-uses a cleared structure it is less likely to cause problems. The second goal
|
||||
is to free the allocated memory.
|
||||
|
||||
The first step determines if the mp\_int structure has been marked as free already. If it has, the algorithm returns
|
||||
success immediately as no further actions are required. Otherwise, the algorithm will proceed to put the structure
|
||||
in a known empty and otherwise invalid state. First the digits of the mp\_int are set to zero. The memory that has been allocated for the
|
||||
digits is then freed. The \textbf{used} and \textbf{alloc} counts are both set to zero and the \textbf{sign} set to
|
||||
\textbf{MP\_ZPOS}. This known fixed state for cleared mp\_int structures will make debuging easier for the end
|
||||
developer. That is, if they spot (via their debugger) an mp\_int they are using that is in this state it will be
|
||||
obvious that they erroneously and prematurely cleared the mp\_int structure.
|
||||
The logic behind the algorithm is extended by marking cleared mp\_int structures so that subsequent calls to this
|
||||
algorithm will not try to free the memory multiple times. Cleared mp\_ints are detectable by having a pre-defined invalid
|
||||
digit pointer \textbf{dp} setting.
|
||||
|
||||
Note that once an mp\_int has been cleared the mp\_int structure is no longer in a valid state for any other algorithm
|
||||
Once an mp\_int has been cleared the mp\_int structure is no longer in a valid state for any other algorithm
|
||||
with the exception of algorithms mp\_init, mp\_init\_copy, mp\_init\_size and mp\_clear.
|
||||
|
||||
EXAM,bn_mp_clear.c
|
||||
|
||||
The ``if'' statement (line @21,a->dp != NULL@) prevents the heap from being corrupted if a user double-frees an
|
||||
mp\_int. This is because once the memory is freed the pointer is set to \textbf{NULL} (line @30,NULL@).
|
||||
The algorithm only operates on the mp\_int if it hasn't been previously cleared. The if statement (line @23,a->dp != NULL@)
|
||||
checks to see if the \textbf{dp} member is not \textbf{NULL}. If the mp\_int is a valid mp\_int then \textbf{dp} cannot be
|
||||
\textbf{NULL} in which case the if statement will evaluate to true.
|
||||
|
||||
Without the check, code that accidentally calls mp\_clear twice for a given mp\_int structure would try to free the memory
|
||||
allocated for the digits twice. This may cause some C libraries to signal a fault. By setting the pointer to
|
||||
\textbf{NULL} it helps debug code that may inadvertently free the mp\_int before it is truly not needed, because attempts
|
||||
to reference digits should fail immediately. The allocated digits are set to zero before being freed (line @24,memset@).
|
||||
This is ideal for cryptographic situations where the integer that the mp\_int represents might need to be kept a secret.
|
||||
The digits of the mp\_int are cleared by the for loop (line @25,for@) which assigns a zero to every digit. Similar to mp\_init()
|
||||
the digits are assigned zero instead of using block memory operations (such as memset()) since this is more portable.
|
||||
|
||||
The digits are deallocated off the heap via the XFREE macro. Similar to XMALLOC the XFREE macro actually evaluates to
|
||||
a standard C library function. In this case the free() function. Since free() only deallocates the memory the pointer
|
||||
still has to be reset to \textbf{NULL} manually (line @33,NULL@).
|
||||
|
||||
Now that the digits have been cleared and deallocated the other members are set to their final values (lines @34,= 0@ and @35,ZPOS@).
|
||||
|
||||
\section{Maintenance Algorithms}
|
||||
|
||||
@ -889,7 +912,7 @@ must be re-sized appropriately to accomodate the result. The mp\_grow algorithm
|
||||
1. if $a.alloc \ge b$ then return(\textit{MP\_OKAY}) \\
|
||||
2. $u \leftarrow b\mbox{ (mod }MP\_PREC\mbox{)}$ \\
|
||||
3. $v \leftarrow b + 2 \cdot MP\_PREC - u$ \\
|
||||
4. Re-Allocate the array of digits $a$ to size $v$ \\
|
||||
4. Re-allocate the array of digits $a$ to size $v$ \\
|
||||
5. If the allocation failed then return(\textit{MP\_MEM}). \\
|
||||
6. for n from a.alloc to $v - 1$ do \\
|
||||
\hspace{+3mm}6.1 $a_n \leftarrow 0$ \\
|
||||
@ -914,15 +937,19 @@ assumed to contain undefined values they are initially set to zero.
|
||||
|
||||
EXAM,bn_mp_grow.c
|
||||
|
||||
The first step is to see if we actually need to perform a re-allocation at all (line @24,a->alloc < size@). If a reallocation
|
||||
must occur the digit count is padded upwards to help prevent many trivial reallocations (line @28,size@). Next the reallocation is performed
|
||||
and the return of realloc() is stored in a temporary pointer named $tmp$ (line @36,realloc@). The return is stored in a temporary
|
||||
instead of $a.dp$ to prevent the code from losing the original pointer in case the reallocation fails. Had the return been stored
|
||||
in $a.dp$ instead there would be no way to reclaim the heap originally used.
|
||||
A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @23,if@) checks
|
||||
if the \textbf{alloc} member of the mp\_int is smaller than the requested digit count. If the count is not larger than \textbf{alloc}
|
||||
the function skips the re-allocation part thus saving time.
|
||||
|
||||
If the reallocation fails the function will return \textbf{MP\_MEM} (line @39,return@), otherwise, the value of $tmp$ is assigned
|
||||
to the pointer $a.dp$ and the function continues. A simple for loop from line @48,a->alloc@ to line @50,}@ will zero all digits
|
||||
that were above the old \textbf{alloc} limit to make sure the integer is in a known state.
|
||||
When a re-allocation is performed it is turned into an optimal request to save time in the future. The requested digit count is
|
||||
padded upwards to 2nd multiple of \textbf{MP\_PREC} larger than \textbf{alloc} (line @25, size@). The XREALLOC function is used
|
||||
to re-allocate the memory. As per the other functions XREALLOC is actually a macro which evaluates to realloc by default. The realloc
|
||||
function leaves the base of the allocation intact which means the first \textbf{alloc} digits of the mp\_int are the same as before
|
||||
the re-allocation. All that is left is to clear the newly allocated digits and return.
|
||||
|
||||
Note that the re-allocation result is actually stored in a temporary pointer $tmp$. This is to allow this function to return
|
||||
an error with a valid pointer. Earlier releases of the library stored the result of XREALLOC into the mp\_int $a$. That would
|
||||
result in a memory leak if XREALLOC ever failed.
|
||||
|
||||
\subsection{Initializing Variable Precision mp\_ints}
|
||||
Occasionally the number of digits required will be known in advance of an initialization, based on, for example, the size
|
||||
@ -970,7 +997,7 @@ The number of digits $b$ requested is padded (line @22,MP_PREC@) by first augmen
|
||||
mp\_int is placed in a default state representing the integer zero. Otherwise, the error code \textbf{MP\_MEM} will be
|
||||
returned (line @27,return@).
|
||||
|
||||
The digits are allocated and set to zero at the same time with the calloc() function (line @25,calloc@). The
|
||||
The digits are allocated and set to zero at the same time with the calloc() function (line @25,XCALLOC@). The
|
||||
\textbf{used} count is set to zero, the \textbf{alloc} count set to the padded digit count and the \textbf{sign} flag set
|
||||
to \textbf{MP\_ZPOS} to achieve a default valid mp\_int state (lines @29,used@, @30,alloc@ and @31,sign@). If the function
|
||||
returns succesfully then it is correct to assume that the mp\_int structure is in a valid state for the remainder of the
|
||||
|