added libtommath-0.06

This commit is contained in:
Tom St Denis 2003-02-28 16:05:26 +00:00 committed by Steffen Jaeckel
parent b01dd94bf2
commit 16c6ccc62c
8 changed files with 102 additions and 71 deletions

2
b.bat
View File

@ -1,3 +1,3 @@
nasm -f coff timer.asm nasm -f coff timer.asm
gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o demo gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o ltmdemo
gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo

80
bn.c
View File

@ -700,8 +700,8 @@ int mp_mul_2(mp_int *a, mp_int *b)
/* low level addition */ /* low level addition */
static int s_mp_add(mp_int *a, mp_int *b, mp_int *c) static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
{ {
mp_int t, *x; mp_int *x;
int res, min, max, i; int olduse, res, min, max, i;
mp_digit u; mp_digit u;
REGFUNC("s_mp_add"); REGFUNC("s_mp_add");
@ -724,45 +724,52 @@ static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
} }
/* init result */ /* init result */
if ((res = mp_init_size(&t, max+1)) != MP_OKAY) { if (c->alloc < max+1) {
DECFUNC(); if ((res = mp_grow(c, max+1)) != MP_OKAY) {
return res; DECFUNC();
return res;
}
} }
t.used = max+1;
olduse = c->used;
c->used = max + 1;
/* add digits from lower part */ /* add digits from lower part */
u = 0; u = 0;
for (i = 0; i < min; i++) { for (i = 0; i < min; i++) {
/* T[i] = A[i] + B[i] + U */ /* T[i] = A[i] + B[i] + U */
t.dp[i] = a->dp[i] + b->dp[i] + u; c->dp[i] = a->dp[i] + b->dp[i] + u;
/* U = carry bit of T[i] */ /* U = carry bit of T[i] */
u = (t.dp[i] >> DIGIT_BIT) & 1; u = (c->dp[i] >> DIGIT_BIT) & 1;
/* take away carry bit from T[i] */ /* take away carry bit from T[i] */
t.dp[i] &= MP_MASK; c->dp[i] &= MP_MASK;
} }
/* now copy higher words if any, that is in A+B if A or B has more digits add those in */ /* now copy higher words if any, that is in A+B if A or B has more digits add those in */
if (min != max) { if (min != max) {
for (; i < max; i++) { for (; i < max; i++) {
/* T[i] = X[i] + U */ /* T[i] = X[i] + U */
t.dp[i] = x->dp[i] + u; c->dp[i] = x->dp[i] + u;
/* U = carry bit of T[i] */ /* U = carry bit of T[i] */
u = (t.dp[i] >> DIGIT_BIT) & 1; u = (c->dp[i] >> DIGIT_BIT) & 1;
/* take away carry bit from T[i] */ /* take away carry bit from T[i] */
t.dp[i] &= MP_MASK; c->dp[i] &= MP_MASK;
} }
} }
/* add carry */ /* add carry */
t.dp[i] = u; c->dp[i] = u;
mp_clamp(&t); /* clear digits above used (since we may not have grown result above) */
mp_exch(&t, c); for (i = c->used; i < olduse; i++) {
mp_clear(&t); c->dp[i] = 0;
}
mp_clamp(c);
DECFUNC(); DECFUNC();
return MP_OKAY; return MP_OKAY;
} }
@ -770,8 +777,7 @@ static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
/* low level subtraction (assumes a > b) */ /* low level subtraction (assumes a > b) */
static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c) static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
{ {
mp_int t; int olduse, res, min, max, i;
int res, min, max, i;
mp_digit u; mp_digit u;
REGFUNC("s_mp_sub"); REGFUNC("s_mp_sub");
@ -784,42 +790,48 @@ static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
max = a->used; max = a->used;
/* init result */ /* init result */
if ((res = mp_init_size(&t, max)) != MP_OKAY) { if (c->alloc < max) {
DECFUNC(); if ((res = mp_grow(c, max)) != MP_OKAY) {
return res; DECFUNC();
return res;
}
} }
t.used = max; olduse = c->used;
c->used = max;
/* sub digits from lower part */ /* sub digits from lower part */
u = 0; u = 0;
for (i = 0; i < min; i++) { for (i = 0; i < min; i++) {
/* T[i] = A[i] - B[i] - U */ /* T[i] = A[i] - B[i] - U */
t.dp[i] = a->dp[i] - (b->dp[i] + u); c->dp[i] = a->dp[i] - (b->dp[i] + u);
/* U = carry bit of T[i] */ /* U = carry bit of T[i] */
u = (t.dp[i] >> DIGIT_BIT) & 1; u = (c->dp[i] >> DIGIT_BIT) & 1;
/* Clear carry from T[i] */ /* Clear carry from T[i] */
t.dp[i] &= MP_MASK; c->dp[i] &= MP_MASK;
} }
/* now copy higher words if any, e.g. if A has more digits than B */ /* now copy higher words if any, e.g. if A has more digits than B */
if (min != max) { if (min != max) {
for (; i < max; i++) { for (; i < max; i++) {
/* T[i] = A[i] - U */ /* T[i] = A[i] - U */
t.dp[i] = a->dp[i] - u; c->dp[i] = a->dp[i] - u;
/* U = carry bit of T[i] */ /* U = carry bit of T[i] */
u = (t.dp[i] >> DIGIT_BIT) & 1; u = (c->dp[i] >> DIGIT_BIT) & 1;
/* Clear carry from T[i] */ /* Clear carry from T[i] */
t.dp[i] &= MP_MASK; c->dp[i] &= MP_MASK;
} }
} }
mp_clamp(&t); /* clear digits above used (since we may not have grown result above) */
mp_exch(&t, c); for (i = c->used; i < olduse; i++) {
mp_clear(&t); c->dp[i] = 0;
}
mp_clamp(c);
DECFUNC(); DECFUNC();
return MP_OKAY; return MP_OKAY;
} }
@ -2941,7 +2953,7 @@ int mp_toradix(mp_int *a, char *str, int radix)
int res, digs; int res, digs;
mp_int t; mp_int t;
mp_digit d; mp_digit d;
unsigned char *_s = str; char *_s = str;
if (radix < 2 || radix > 64) { if (radix < 2 || radix > 64) {
return MP_VAL; return MP_VAL;
@ -2966,7 +2978,7 @@ int mp_toradix(mp_int *a, char *str, int radix)
*str++ = s_rmap[d]; *str++ = s_rmap[d];
++digs; ++digs;
} }
reverse(_s, digs); reverse((unsigned char *)_s, digs);
*str++ = '\0'; *str++ = '\0';
mp_clear(&t); mp_clear(&t);
return MP_OKAY; return MP_OKAY;

18
bn.h
View File

@ -12,7 +12,6 @@
* *
* Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca * Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca
*/ */
#ifndef BN_H_ #ifndef BN_H_
#define BN_H_ #define BN_H_
@ -39,13 +38,18 @@
#else #else
#ifndef CRYPT #ifndef CRYPT
#ifdef _MSC_VER #ifdef _MSC_VER
typedef __int64 ulong64; typedef unsigned __int64 ulong64;
typedef signed __int64 long64;
#else #else
typedef unsigned long long ulong64; typedef unsigned long long ulong64;
typedef signed long long long64;
#endif #endif
#endif #endif
/* default case */
typedef unsigned long mp_digit; typedef unsigned long mp_digit;
typedef ulong64 mp_word; typedef ulong64 mp_word;
#define DIGIT_BIT 28U #define DIGIT_BIT 28U
#endif #endif
@ -72,8 +76,14 @@
typedef int mp_err; typedef int mp_err;
#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */ /* you'll have to tune these... */
#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */ #ifdef FAST_FPU
#define KARATSUBA_MUL_CUTOFF 100 /* Min. number of digits before Karatsuba multiplication is used. */
#define KARATSUBA_SQR_CUTOFF 100 /* Min. number of digits before Karatsuba squaring is used. */
#else
#define KARATSUBA_MUL_CUTOFF 80 /* Min. number of digits before Karatsuba multiplication is used. */
#define KARATSUBA_SQR_CUTOFF 80 /* Min. number of digits before Karatsuba squaring is used. */
#endif
typedef struct { typedef struct {
int used, alloc, sign; int used, alloc, sign;

BIN
bn.pdf

Binary file not shown.

32
bn.tex
View File

@ -1,7 +1,7 @@
\documentclass{article} \documentclass{article}
\begin{document} \begin{document}
\title{LibTomMath v0.05 \\ A Free Multiple Precision Integer Library} \title{LibTomMath v0.06 \\ A Free Multiple Precision Integer Library}
\author{Tom St Denis \\ tomstdenis@iahu.ca} \author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle \maketitle
\newpage \newpage
@ -460,34 +460,34 @@ were observed.
\begin{tabular}{c|c|c|c} \begin{tabular}{c|c|c|c}
\hline \textbf{Operation} & \textbf{Size (bits)} & \textbf{Time with MPI (cycles)} & \textbf{Time with LibTomMath (cycles)} \\ \hline \textbf{Operation} & \textbf{Size (bits)} & \textbf{Time with MPI (cycles)} & \textbf{Time with LibTomMath (cycles)} \\
\hline \hline
Inversion & 128 & 264,083 & 159,194 \\ Inversion & 128 & 264,083 & 59,782 \\
Inversion & 256 & 549,370 & 355,914 \\ Inversion & 256 & 549,370 & 146,915 \\
Inversion & 512 & 1,675,975 & 842,538 \\ Inversion & 512 & 1,675,975 & 367,172 \\
Inversion & 1024 & 5,237,957 & 2,170,804 \\ Inversion & 1024 & 5,237,957 & 1,054,158 \\
Inversion & 2048 & 17,871,944 & 6,250,876 \\ Inversion & 2048 & 17,871,944 & 3,459,683 \\
Inversion & 4096 & 66,610,468 & 18,161,612 \\ Inversion & 4096 & 66,610,468 & 11,834,556 \\
\hline \hline
Multiply & 128 & 1,426 & 828 \\ Multiply & 128 & 1,426 & 828 \\
Multiply & 256 & 2,551 & 1,393 \\ Multiply & 256 & 2,551 & 1,393 \\
Multiply & 512 & 7,913 & 2,926 \\ Multiply & 512 & 7,913 & 2,926 \\
Multiply & 1024 & 28,496 & 8,620 \\ Multiply & 1024 & 28,496 & 8,620 \\
Multiply & 2048 & 109,897 & 28,967 \\ Multiply & 2048 & 109,897 & 28,967 \\
Multiply & 4096 & 469,970 & 106,855 \\ Multiply & 4096 & 469,970 & 105,387 \\
\hline \hline
Square & 128 & 1,319 & 869 \\ Square & 128 & 1,319 & 869 \\
Square & 256 & 1,776 & 1,362 \\ Square & 256 & 1,776 & 1,362 \\
Square & 512 & 5,399 & 2,571 \\ Square & 512 & 5,399 & 2,571 \\
Square & 1024 & 18,991 & 6,332 \\ Square & 1024 & 18,991 & 6,332 \\
Square & 2048 & 72,126 & 18,426 \\ Square & 2048 & 72,126 & 18,426 \\
Square & 4096 & 306,269 & 76,305 \\ Square & 4096 & 306,269 & 74,908 \\
\hline \hline
Exptmod & 512 & 32,021,586 & 5,709,468 \\ Exptmod & 512 & 32,021,586 & 5,696,459 \\
Exptmod & 768 & 97,595,492 & 12,473,526 \\ Exptmod & 768 & 97,595,492 & 12,428,274 \\
Exptmod & 1024 & 223,302,532 & 23,593,075 \\ Exptmod & 1024 & 223,302,532 & 22,834,316 \\
Exptmod & 2048 & 1,682,223,369 & 121,992,481 \\ Exptmod & 2048 & 1,682,223,369 & 119,888,049 \\
Exptmod & 2560 & 3,268,615,571 & 258,155,605 \\ Exptmod & 2560 & 3,268,615,571 & 250,901,263 \\
Exptmod & 3072 & 5,597,240,141 & 399,800,504 \\ Exptmod & 3072 & 5,597,240,141 & 391,716,431 \\
Exptmod & 4096 & 13,347,270,891 & 826,013,375 Exptmod & 4096 & 13,347,270,891 & 814,429,647
\end{tabular} \end{tabular}
\end{center} \end{center}

View File

@ -1,3 +1,7 @@
Dec 31st, 2002
v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc...
-- Cleaned up the header a bit more
Dec 30th, 2002 Dec 30th, 2002
v0.05 -- Builds with MSVC out of the box v0.05 -- Builds with MSVC out of the box
-- Fixed a bug in mp_invmod w.r.t. even moduli -- Fixed a bug in mp_invmod w.r.t. even moduli

35
demo.c
View File

@ -1,5 +1,6 @@
#include <time.h> #include <time.h>
#ifdef U_MPI #ifdef U_MPI
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
@ -12,7 +13,6 @@
#else #else
typedef unsigned long long ulong64; typedef unsigned long long ulong64;
#endif #endif
#else #else
#include "bn.h" #include "bn.h"
#endif #endif
@ -22,6 +22,8 @@
extern ulong64 rdtsc(void); extern ulong64 rdtsc(void);
extern void reset(void); extern void reset(void);
#else #else
ulong64 _tt; ulong64 _tt;
void reset(void) { _tt = clock(); } void reset(void) { _tt = clock(); }
ulong64 rdtsc(void) { return clock() - _tt; } ulong64 rdtsc(void) { return clock() - _tt; }
@ -73,11 +75,11 @@ int mp_reduce_setup(mp_int *a, mp_int *b)
} }
#endif #endif
char cmd[4096], buf[4096];
int main(void) int main(void)
{ {
mp_int a, b, c, d, e, f; mp_int a, b, c, d, e, f;
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n; unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n;
unsigned char cmd[4096], buf[4096];
int rr; int rr;
#ifdef TIMER #ifdef TIMER
@ -92,6 +94,7 @@ int main(void)
mp_init(&e); mp_init(&e);
mp_init(&f); mp_init(&f);
mp_read_radix(&a, "V//////////////////////////////////////////////////////////////////////////////////////", 64); mp_read_radix(&a, "V//////////////////////////////////////////////////////////////////////////////////////", 64);
mp_reduce_setup(&b, &a); mp_reduce_setup(&b, &a);
printf("\n\n----\n\n"); printf("\n\n----\n\n");
@ -102,12 +105,11 @@ int main(void)
mp_sub_d(&a, 1, &c); mp_sub_d(&a, 1, &c);
mp_exptmod(&b, &c, &a, &d); mp_exptmod(&b, &c, &a, &d);
mp_toradix(&d, buf, 10); mp_toradix(&d, buf, 10);
printf("b^p-1 == %s\n", buf); printf("b^p-1 == %s\n", buf);
#ifdef TIMER #ifdef TIMER
mp_read_radix(&a, "340282366920938463463374607431768211455", 10); mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
mp_read_radix(&b, "340282366920938463463574607431768211455", 10); mp_read_radix(&b, "340282366920938463463574607431768211455", 10);
while (a.used * DIGIT_BIT < 8192) { while (a.used * DIGIT_BIT < 8192) {
reset(); reset();
@ -132,30 +134,31 @@ int main(void)
mp_sqr(&a, &a); mp_sqr(&a, &a);
mp_sqr(&b, &b); mp_sqr(&b, &b);
} }
mp_read_radix(&a, "340282366920938463463374607431768211455", 10); mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
while (a.used * DIGIT_BIT < 8192) { while (a.used * DIGIT_BIT < 8192) {
reset(); reset();
for (rr = 0; rr < 10000; rr++) { for (rr = 0; rr < 100000; rr++) {
mp_sqr(&a, &b); mp_sqr(&a, &b);
} }
tt = rdtsc(); tt = rdtsc();
printf("Squaring %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)10000)); printf("Squaring %d-bit took %lu cycles\n", mp_count_bits(&a), tt / ((ulong64)100000));
mp_copy(&b, &a); mp_copy(&b, &a);
} }
mp_read_radix(&a, "340282366920938463463374607431768211455", 10); mp_read_radix(&a, "340282366920938463463374607431768211455", 10);
while (a.used * DIGIT_BIT < 8192) { while (a.used * DIGIT_BIT < 8192) {
reset(); reset();
for (rr = 0; rr < 10000; rr++) { for (rr = 0; rr < 100000; rr++) {
mp_mul(&a, &a, &b); mp_mul(&a, &a, &b);
} }
tt = rdtsc(); tt = rdtsc();
printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)10000)); printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)100000));
mp_copy(&b, &a); mp_copy(&b, &a);
} }
{ {
char *primes[] = { char *primes[] = {
@ -213,6 +216,8 @@ int main(void)
mp_sqr(&a, &a); mp_sqr(&a, &a);
mp_sqr(&b, &b); mp_sqr(&b, &b);
} }
return 0;
#endif #endif
@ -264,9 +269,9 @@ draw(&a);draw(&b);draw(&c);draw(&d);
/* test the sign/unsigned storage functions */ /* test the sign/unsigned storage functions */
rr = mp_signed_bin_size(&c); rr = mp_signed_bin_size(&c);
mp_to_signed_bin(&c, cmd); mp_to_signed_bin(&c, (unsigned char *)cmd);
memset(cmd+rr, rand()&255, sizeof(cmd)-rr); memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
mp_read_signed_bin(&d, cmd, rr); mp_read_signed_bin(&d, (unsigned char *)cmd, rr);
if (mp_cmp(&c, &d) != MP_EQ) { if (mp_cmp(&c, &d) != MP_EQ) {
printf("mp_signed_bin failure!\n"); printf("mp_signed_bin failure!\n");
draw(&c); draw(&c);
@ -276,9 +281,9 @@ draw(&a);draw(&b);draw(&c);draw(&d);
rr = mp_unsigned_bin_size(&c); rr = mp_unsigned_bin_size(&c);
mp_to_unsigned_bin(&c, cmd); mp_to_unsigned_bin(&c, (unsigned char *)cmd);
memset(cmd+rr, rand()&255, sizeof(cmd)-rr); memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
mp_read_unsigned_bin(&d, cmd, rr); mp_read_unsigned_bin(&d, (unsigned char *)cmd, rr);
if (mp_cmp_mag(&c, &d) != MP_EQ) { if (mp_cmp_mag(&c, &d) != MP_EQ) {
printf("mp_unsigned_bin failure!\n"); printf("mp_unsigned_bin failure!\n");
draw(&c); draw(&c);

View File

@ -1,7 +1,7 @@
CC = gcc CC = gcc
CFLAGS += -DDEBUG -Wall -W -O3 -fomit-frame-pointer -funroll-loops CFLAGS += -DDEBUG -Wall -W -O3 -fomit-frame-pointer -funroll-loops
VERSION=0.05 VERSION=0.06
default: test default: test