added libtommath-0.07
This commit is contained in:
		
							parent
							
								
									16c6ccc62c
								
							
						
					
					
						commit
						3cd7000342
					
				
							
								
								
									
										2
									
								
								b.bat
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								b.bat
									
									
									
									
									
								
							| @ -1,3 +1,3 @@ | ||||
| nasm -f coff timer.asm | ||||
| gcc -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c bn.c timer.o -o ltmdemo | ||||
| gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo | ||||
| rem gcc -I./mtest/ -DU_MPI -Wall -W -O3 -fomit-frame-pointer -funroll-loops -DTIMER_X86 demo.c mtest/mpi.c timer.o -o mpidemo | ||||
|  | ||||
							
								
								
									
										237
									
								
								bn.c
									
									
									
									
									
								
							
							
						
						
									
										237
									
								
								bn.c
									
									
									
									
									
								
							| @ -849,8 +849,7 @@ static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c) | ||||
|  */ | ||||
| static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
| { | ||||
|    mp_int t; | ||||
|    int res, pa, pb, ix, iy; | ||||
|    int olduse, res, pa, pb, ix, iy; | ||||
|    mp_word W[512], *_W; | ||||
|    mp_digit tmpx, *tmpy; | ||||
|     | ||||
| @ -859,11 +858,12 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|    VERIFY(b); | ||||
|    VERIFY(c); | ||||
|     | ||||
|    if ((res = mp_init_size(&t, digs)) != MP_OKAY) { | ||||
|       DECFUNC(); | ||||
|       return res; | ||||
|    if (c->alloc < digs) { | ||||
|       if ((res = mp_grow(c, digs)) != MP_OKAY) { | ||||
|          DECFUNC(); | ||||
|          return res; | ||||
|       } | ||||
|    } | ||||
|    t.used = digs; | ||||
|     | ||||
|    /* clear temp buf (the columns) */ | ||||
|    memset(W, 0, digs*sizeof(mp_word)); | ||||
| @ -893,6 +893,11 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|        } | ||||
|    } | ||||
|     | ||||
|    /* setup dest */ | ||||
|    olduse  = c->used; | ||||
|    c->used = digs; | ||||
| 
 | ||||
|     | ||||
|    /* At this point W[] contains the sums of each column.  To get the
 | ||||
|     * correct result we must take the extra bits from each column and | ||||
|     * carry them down | ||||
| @ -904,14 +909,17 @@ static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|     * this is slower but on most cryptographic size numbers it is faster. | ||||
|     */ | ||||
|    for (ix = 1; ix < digs; ix++) { | ||||
|        W[ix]      = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT)); | ||||
|        t.dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK); | ||||
|        W[ix]       = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT)); | ||||
|        c->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK); | ||||
|    } | ||||
|    c->dp[digs-1]   = W[digs-1] & ((mp_word)MP_MASK); | ||||
|     | ||||
|    /* clear unused */ | ||||
|    for (ix = c->used; ix < olduse; ix++) { | ||||
|       c->dp[ix] = 0; | ||||
|    } | ||||
|    t.dp[digs-1]   = W[digs-1] & ((mp_word)MP_MASK); | ||||
|    | ||||
|    mp_clamp(&t); | ||||
|    mp_exch(&t, c); | ||||
|    mp_clear(&t); | ||||
|    mp_clamp(c); | ||||
|    DECFUNC(); | ||||
|    return MP_OKAY; | ||||
| } | ||||
| @ -993,8 +1001,7 @@ static int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|  */ | ||||
| static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
| { | ||||
|    mp_int t; | ||||
|    int res, pa, pb, ix, iy; | ||||
|    int oldused, newused, res, pa, pb, ix, iy; | ||||
|    mp_word W[512], *_W; | ||||
|    mp_digit tmpx, *tmpy; | ||||
|     | ||||
| @ -1003,11 +1010,13 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|    VERIFY(b); | ||||
|    VERIFY(c); | ||||
|     | ||||
|    if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { | ||||
|       DECFUNC(); | ||||
|       return res; | ||||
|    newused = a->used + b->used + 1; | ||||
|    if (c->alloc < newused) { | ||||
|       if ((res = mp_grow(c, newused)) != MP_OKAY) { | ||||
|          DECFUNC(); | ||||
|          return res; | ||||
|       } | ||||
|    } | ||||
|    t.used = a->used + b->used + 1; | ||||
|     | ||||
|    /* like the other comba method we compute the columns first */ | ||||
|    pa = a->used; | ||||
| @ -1025,17 +1034,21 @@ static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|        } | ||||
|    } | ||||
|     | ||||
|    /* setup dest */ | ||||
|    oldused = c->used; | ||||
|    c->used = newused; | ||||
|     | ||||
|    /* now convert the array W downto what we need */ | ||||
|    for (ix = digs+1; ix < (pa+pb+1); ix++) { | ||||
|        W[ix]      = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT)); | ||||
|        t.dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK); | ||||
|        W[ix]       = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT)); | ||||
|        c->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK); | ||||
|    } | ||||
|    t.dp[(pa+pb+1)-1]   = W[(pa+pb+1)-1] & ((mp_word)MP_MASK); | ||||
|    c->dp[(pa+pb+1)-1] = W[(pa+pb+1)-1] & ((mp_word)MP_MASK); | ||||
|     | ||||
|     | ||||
|    mp_clamp(&t); | ||||
|    mp_exch(&t, c); | ||||
|    mp_clear(&t); | ||||
|    for (ix = c->used; ix < oldused; ix++) { | ||||
|       c->dp[ix] = 0; | ||||
|    } | ||||
|    mp_clamp(c); | ||||
|    DECFUNC(); | ||||
|    return MP_OKAY; | ||||
| } | ||||
| @ -1106,8 +1119,7 @@ static int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs) | ||||
|  */ | ||||
| static int fast_s_mp_sqr(mp_int *a, mp_int *b) | ||||
| { | ||||
|    mp_int t; | ||||
|    int res, ix, iy, pa; | ||||
|    int olduse, newused, res, ix, iy, pa; | ||||
|    mp_word  W2[512], W[512], *_W; | ||||
|    mp_digit tmpx, *tmpy; | ||||
|     | ||||
| @ -1116,11 +1128,13 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b) | ||||
|    VERIFY(b); | ||||
| 
 | ||||
|    pa = a->used; | ||||
|    if ((res = mp_init_size(&t, pa + pa + 1)) != MP_OKAY) { | ||||
|       DECFUNC(); | ||||
|       return res; | ||||
|    } | ||||
|    t.used = pa + pa + 1; | ||||
|    newused = pa + pa + 1; | ||||
|    if (b->alloc < newused) { | ||||
|       if ((res = mp_grow(b, newused)) != MP_OKAY) { | ||||
|          DECFUNC(); | ||||
|          return res; | ||||
|       } | ||||
|    }    | ||||
|     | ||||
|    /* zero temp buffer (columns) */ | ||||
|    memset(W, 0, (pa+pa+1)*sizeof(mp_word)); | ||||
| @ -1144,19 +1158,29 @@ static int fast_s_mp_sqr(mp_int *a, mp_int *b) | ||||
|    /* double first value, since the inner products are half of what they should be */ | ||||
|    W[0] += W[0] + W2[0]; | ||||
|     | ||||
|    /* setup dest */ | ||||
|    olduse  = b->used; | ||||
|    b->used = newused; | ||||
|     | ||||
|    /* now compute digits */ | ||||
|    for (ix = 1; ix < (pa+pa+1); ix++) { | ||||
|        /* double/add next digit */ | ||||
|        W[ix] += W[ix] + W2[ix]; | ||||
|         | ||||
|        W[ix]      = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT)); | ||||
|        t.dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK); | ||||
|        W[ix]       += W[ix] + W2[ix]; | ||||
| 
 | ||||
|        W[ix]       = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT)); | ||||
|        b->dp[ix-1] = W[ix-1] & ((mp_word)MP_MASK); | ||||
|    } | ||||
|    t.dp[(pa+pa+1)-1]   = W[(pa+pa+1)-1] & ((mp_word)MP_MASK); | ||||
|    b->dp[(pa+pa+1)-1]   = W[(pa+pa+1)-1] & ((mp_word)MP_MASK); | ||||
|     | ||||
|    mp_clamp(&t); | ||||
|    mp_exch(&t, b); | ||||
|    mp_clear(&t); | ||||
|    /* clear high */ | ||||
|    for (ix = b->used; ix < olduse; ix++) { | ||||
|        b->dp[ix] = 0; | ||||
|    } | ||||
|     | ||||
|    /* fix the sign (since we no longer make a fresh temp) */ | ||||
|    b->sign = MP_ZPOS; | ||||
|     | ||||
|    mp_clamp(b); | ||||
|    DECFUNC(); | ||||
|    return MP_OKAY; | ||||
| } | ||||
| @ -1173,13 +1197,13 @@ static int s_mp_sqr(mp_int *a, mp_int *b) | ||||
|    VERIFY(a); | ||||
|    VERIFY(b); | ||||
|     | ||||
|    /* can we use the fast multiplier? */ | ||||
|    /* can we use the fast multiplier? */   | ||||
|    if (((a->used * 2 + 1) < 512) && a->used < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT) - 1))) { | ||||
|       res = fast_s_mp_sqr(a,b); | ||||
|       DECFUNC(); | ||||
|       return res; | ||||
|    }   | ||||
| 
 | ||||
|     | ||||
|    pa = a->used; | ||||
|    if ((res = mp_init_size(&t, pa + pa + 1)) != MP_OKAY) { | ||||
|       DECFUNC(); | ||||
| @ -1385,10 +1409,9 @@ static int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c) | ||||
|    if (mp_lshd(&x1y1, B*2) != MP_OKAY) goto X1Y1;                 /* x1y1 = x1y1 << 2*B */ | ||||
| 
 | ||||
|    if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1;             /* t1 = x0y0 + t1 */ | ||||
|    if (mp_add(&t1, &x1y1, &t1) != MP_OKAY) goto X1Y1;             /* t1 = x0y0 + t1 + x1y1 */ | ||||
|    if (mp_add(&t1, &x1y1, c) != MP_OKAY) goto X1Y1;               /* t1 = x0y0 + t1 + x1y1 */ | ||||
| 
 | ||||
|    err = MP_OKAY; | ||||
|    mp_exch(&t1, c); | ||||
| 
 | ||||
| X1Y1: mp_clear(&x1y1); | ||||
| X0Y0: mp_clear(&x0y0); | ||||
| @ -1426,7 +1449,7 @@ int mp_mul(mp_int *a, mp_int *b, mp_int *c) | ||||
| static int mp_karatsuba_sqr(mp_int *a, mp_int *b) | ||||
| { | ||||
|    mp_int x0, x1, t1, t2, x0x0, x1x1; | ||||
|    int B, err; | ||||
|    int B, err, x; | ||||
|     | ||||
|    REGFUNC("mp_karatsuba_sqr"); | ||||
|    VERIFY(a); | ||||
| @ -1441,8 +1464,8 @@ static int mp_karatsuba_sqr(mp_int *a, mp_int *b) | ||||
|    B = B/2; | ||||
| 
 | ||||
|    /* init copy all the temps */ | ||||
|    if (mp_init_copy(&x0, a) != MP_OKAY) goto ERR; | ||||
|    if (mp_init_copy(&x1, a) != MP_OKAY) goto X0; | ||||
|    if (mp_init_size(&x0, B) != MP_OKAY) goto ERR; | ||||
|    if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0; | ||||
| 
 | ||||
|    /* init temps */ | ||||
|    if (mp_init(&t1) != MP_OKAY)         goto X1; | ||||
| @ -1451,16 +1474,27 @@ static int mp_karatsuba_sqr(mp_int *a, mp_int *b) | ||||
|    if (mp_init(&x1x1) != MP_OKAY)       goto X0X0; | ||||
| 
 | ||||
|    /* now shift the digits */ | ||||
|    mp_mod_2d(&x0, B*DIGIT_BIT, &x0); | ||||
|    mp_rshd(&x1, B); | ||||
|    for (x = 0; x < B; x++) { | ||||
|        x0.dp[x] = a->dp[x]; | ||||
|    } | ||||
| 
 | ||||
|    for (x = B; x < a->used; x++) { | ||||
|        x1.dp[x-B] = a->dp[x]; | ||||
|    } | ||||
|     | ||||
|    x0.used = B; | ||||
|    x1.used = a->used - B; | ||||
|     | ||||
|    mp_clamp(&x0); | ||||
|    mp_clamp(&x1); | ||||
|     | ||||
|    /* now calc the products x0*x0 and x1*x1 */ | ||||
|    if (s_mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1;                /* x0x0 = x0*x0 */ | ||||
|    if (s_mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1;                /* x1x1 = x1*x1 */ | ||||
|    if (mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1;                /* x0x0 = x0*x0 */ | ||||
|    if (mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1;                /* x1x1 = x1*x1 */ | ||||
| 
 | ||||
|    /* now calc x1-x0 and y1-y0 */ | ||||
|    if (mp_sub(&x1, &x0, &t1) != MP_OKAY) goto X1X1;               /* t1 = x1 - x0 */ | ||||
|    if (s_mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1;                  /* t1 = (x1 - x0) * (y1 - y0) */ | ||||
|    if (mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1;                  /* t1 = (x1 - x0) * (y1 - y0) */ | ||||
| 
 | ||||
|    /* add x0y0 */ | ||||
|    if (mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1;           /* t2 = x0y0 + x1y1 */ | ||||
| @ -1471,10 +1505,9 @@ static int mp_karatsuba_sqr(mp_int *a, mp_int *b) | ||||
|    if (mp_lshd(&x1x1, B*2) != MP_OKAY) goto X1X1;                 /* x1y1 = x1y1 << 2*B */ | ||||
| 
 | ||||
|    if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1;             /* t1 = x0y0 + t1 */ | ||||
|    if (mp_add(&t1, &x1x1, &t1) != MP_OKAY) goto X1X1;             /* t1 = x0y0 + t1 + x1y1 */ | ||||
|    if (mp_add(&t1, &x1x1, b) != MP_OKAY) goto X1X1;               /* t1 = x0y0 + t1 + x1y1 */ | ||||
| 
 | ||||
|    err = MP_OKAY; | ||||
|    mp_exch(&t1, b); | ||||
|     | ||||
| X1X1: mp_clear(&x1x1); | ||||
| X0X0: mp_clear(&x0x0); | ||||
| @ -2784,6 +2817,102 @@ __M  : | ||||
|    return err; | ||||
| } | ||||
| 
 | ||||
| /* find the n'th root of an integer 
 | ||||
|  * | ||||
|  * Result found such that (c)^b <= a and (c+1)^b > a  | ||||
|  */ | ||||
| int mp_n_root(mp_int *a, mp_digit b, mp_int *c) | ||||
| { | ||||
|    mp_int t1, t2, t3; | ||||
|    int res, neg; | ||||
|     | ||||
|    /* input must be positive if b is even*/ | ||||
|    if ((b&1) == 0 && a->sign == MP_NEG) { | ||||
|       return MP_VAL; | ||||
|    } | ||||
|     | ||||
|    if ((res = mp_init(&t1)) != MP_OKAY) { | ||||
|       return res; | ||||
|    } | ||||
|     | ||||
|    if ((res = mp_init(&t2)) != MP_OKAY) { | ||||
|       goto __T1; | ||||
|    } | ||||
|     | ||||
|    if ((res = mp_init(&t3)) != MP_OKAY) { | ||||
|       goto __T2; | ||||
|    } | ||||
| 
 | ||||
|    /* if a is negative fudge the sign but keep track */ | ||||
|    neg     = a->sign; | ||||
|    a->sign = MP_ZPOS; | ||||
| 
 | ||||
|    /* t2 = a */ | ||||
|    if ((res = mp_copy(a, &t2)) != MP_OKAY) { | ||||
|       goto __T3; | ||||
|    } | ||||
|    | ||||
|    do { | ||||
|       /* t1 = t2 */ | ||||
|       if ((res = mp_copy(&t2, &t1)) != MP_OKAY) { | ||||
|          goto __T3; | ||||
|       } | ||||
| 
 | ||||
|       /* t2 = t1 - ((t1^b - a) / (b * t1^(b-1))) */ | ||||
|       if ((res = mp_expt_d(&t1, b-1, &t3)) != MP_OKAY) {            /* t3 = t1^(b-1) */ | ||||
|          goto __T3; | ||||
|       } | ||||
| 
 | ||||
|       /* numerator */ | ||||
|       if ((res = mp_mul(&t3, &t1, &t2)) != MP_OKAY) {               /* t2 = t1^b */ | ||||
|          goto __T3; | ||||
|       } | ||||
|        | ||||
|       if ((res = mp_sub(&t2, a, &t2)) != MP_OKAY) {                 /* t2 = t1^b - a */ | ||||
|          goto __T3; | ||||
|       } | ||||
| 
 | ||||
|       if ((res = mp_mul_d(&t3, b, &t3)) != MP_OKAY) {               /* t3 = t1^(b-1) * b  */ | ||||
|          goto __T3; | ||||
|       } | ||||
|        | ||||
|       if ((res = mp_div(&t2, &t3, &t3, NULL)) != MP_OKAY) {         /* t3 = (t1^b - a)/(b * t1^(b-1)) */ | ||||
|          goto __T3; | ||||
|       } | ||||
|        | ||||
|       if ((res = mp_sub(&t1, &t3, &t2)) != MP_OKAY) { | ||||
|          goto __T3; | ||||
|       } | ||||
|    } while (mp_cmp(&t1, &t2) != MP_EQ); | ||||
|     | ||||
|    /* result can be at most off by one so check */ | ||||
|    if ((res = mp_expt_d(&t1, b, &t2)) != MP_OKAY) { | ||||
|       goto __T3; | ||||
|    } | ||||
|     | ||||
|    if (mp_cmp(&t2, a) == MP_GT) { | ||||
|       if ((res = mp_sub_d(&t1, 1, &t1)) != MP_OKAY) { | ||||
|          goto __T3; | ||||
|       } | ||||
|    } | ||||
|     | ||||
|    /* reset the sign of a first */ | ||||
|    a->sign = neg; | ||||
|     | ||||
|    /* set the result */ | ||||
|    mp_exch(&t1, c); | ||||
|     | ||||
|    /* set the sign of the result */ | ||||
|    c->sign = neg;    | ||||
|     | ||||
|    res = MP_OKAY; | ||||
|     | ||||
| __T3:  mp_clear(&t3); | ||||
| __T2:  mp_clear(&t2); | ||||
| __T1:  mp_clear(&t1); | ||||
|    return res; | ||||
| } | ||||
| 
 | ||||
| /* --> radix conversion <-- */ | ||||
| /* reverse an array, used for radix code */ | ||||
| static void reverse(unsigned char *s, int len) | ||||
|  | ||||
							
								
								
									
										9
									
								
								bn.h
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								bn.h
									
									
									
									
									
								
							| @ -233,6 +233,15 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c); | ||||
| /* c = [a, b] or (a*b)/(a, b) */ | ||||
| int mp_lcm(mp_int *a, mp_int *b, mp_int *c); | ||||
| 
 | ||||
| /* finds one of the b'th root of a, such that |c|^b <= |a| 
 | ||||
|  * | ||||
|  * returns error if a < 0 and b is even | ||||
|  */ | ||||
| int mp_n_root(mp_int *a, mp_digit b, mp_int *c); | ||||
| 
 | ||||
| /* shortcut for square root */ | ||||
| #define mp_sqrt(a, b) mp_n_root(a, 2, b) | ||||
| 
 | ||||
| /* used to setup the Barrett reduction for a given modulus b */ | ||||
| int mp_reduce_setup(mp_int *a, mp_int *b); | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										107
									
								
								bn.tex
									
									
									
									
									
								
							
							
						
						
									
										107
									
								
								bn.tex
									
									
									
									
									
								
							| @ -1,7 +1,7 @@ | ||||
| \documentclass{article} | ||||
| \begin{document} | ||||
| 
 | ||||
| \title{LibTomMath v0.06 \\ A Free Multiple Precision Integer Library} | ||||
| \title{LibTomMath v0.07 \\ A Free Multiple Precision Integer Library} | ||||
| \author{Tom St Denis \\ tomstdenis@iahu.ca} | ||||
| \maketitle | ||||
| \newpage | ||||
| @ -187,17 +187,6 @@ int mp_mul_2(mp_int *a, mp_int *b); | ||||
| int mp_mod_2d(mp_int *a, int b, mp_int *c); | ||||
| \end{verbatim} | ||||
| 
 | ||||
| Both the \textbf{mp\_rshd} and \textbf{mp\_lshd} functions provide shifting by whole digits.  For example,  | ||||
| mp\_rshd($x$, $n$) is the same as $x \leftarrow \lfloor x / \beta^n \rfloor$ while mp\_lshd($x$, $n$) is equivalent | ||||
| to $x \leftarrow x \cdot \beta^n$.  Both functions are extremely fast as they merely copy digits within the array.   | ||||
| 
 | ||||
| Similarly the \textbf{mp\_div\_2d} and \textbf{mp\_mul\_2d} functions provide shifting but allow any bit count to  | ||||
| be specified.  For example, mp\_div\_2d($x$, $n$, $y$) is the same as $y =\lfloor x / 2^n \rfloor$ while  | ||||
| mp\_mul\_2d($x$, $n$, $y$) is the same as $y = x \cdot 2^n$.  The \textbf{mp\_div\_2} and \textbf{mp\_mul\_2}  | ||||
| functions are legacy functions that merely shift right or left one bit respectively.  The \textbf{mp\_mod\_2d} function | ||||
| reduces an integer mod a power of two.  For example, mp\_mod\_2d($x$, $n$, $y$) is the same as  | ||||
| $y \equiv x \mbox{ (mod }2^n\mbox{)}$. | ||||
| 
 | ||||
| \subsection{Basic Arithmetic} | ||||
| 
 | ||||
| Next are the class of functions which provide basic arithmetic. | ||||
| @ -234,17 +223,7 @@ int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d); | ||||
| int mp_mod(mp_int *a, mp_int *b, mp_int *c); | ||||
| \end{verbatim} | ||||
| 
 | ||||
| The \textbf{mp\_cmp} will compare two integers.  It will return \textbf{MP\_LT} if the first parameter is less than | ||||
| the second, \textbf{MP\_GT} if it is greater or \textbf{MP\_EQ} if they are equal.  These constants are the same as from | ||||
| MPI. | ||||
| 
 | ||||
| The \textbf{mp\_add}, \textbf{mp\_sub}, \textbf{mp\_mul}, \textbf{mp\_div}, \textbf{mp\_sqr} and \textbf{mp\_mod} are all | ||||
| fairly straight forward to understand.  Note that in mp\_div either $c$ (the quotient) or $d$ (the remainder) can be  | ||||
| passed as NULL to ignore it.  For example, if you only want the quotient $z = \lfloor x/y \rfloor$ then a call such as  | ||||
| mp\_div(\&x, \&y, \&z, NULL) is acceptable. | ||||
| 
 | ||||
| There is a related class of ``single digit'' functions that are like the above except they use a digit as the second | ||||
| operand. | ||||
| \subsection{Single Digit Functions} | ||||
| 
 | ||||
| \begin{verbatim} | ||||
| /* compare against a single digit */ | ||||
| @ -296,14 +275,13 @@ int mp_gcd(mp_int *a, mp_int *b, mp_int *c); | ||||
| /* c = [a, b] or (a*b)/(a, b) */ | ||||
| int mp_lcm(mp_int *a, mp_int *b, mp_int *c); | ||||
| 
 | ||||
| /* find the b'th root of a  */ | ||||
| int mp_n_root(mp_int *a, mp_digit b, mp_int *c); | ||||
| 
 | ||||
| /* d = a^b (mod c) */ | ||||
| int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d); | ||||
| \end{verbatim} | ||||
| 
 | ||||
| These are all fairly simple to understand.  The \textbf{mp\_invmod} is a modular multiplicative inverse.  That is it | ||||
| stores in the third parameter an integer such that $ac \equiv 1 \mbox{ (mod }b\mbox{)}$ provided such integer exists.  If | ||||
| there is no such integer the function returns \textbf{MP\_VAL}. | ||||
| 
 | ||||
| \subsection{Radix Conversions} | ||||
| To read or store integers in other formats there are the following functions. | ||||
| 
 | ||||
| @ -432,7 +410,7 @@ when $b \le 0$, in theory the routine will still give a properly congruent answe | ||||
| 
 | ||||
| This function requires $O(4 \cdot N)$ memory and $O(3 \cdot N^2)$ time. | ||||
| 
 | ||||
| \subsection{Modular Arithmetic} | ||||
| \subsection{Number Theoretic Functions} | ||||
| 
 | ||||
| \subsubsection{mp\_addmod, mp\_submod, mp\_mulmod, mp\_sqrmod} | ||||
| These functions take the time of their host function plus the time it takes to perform a division.  For example,  | ||||
| @ -445,6 +423,41 @@ Also note that these functions use mp\_mod which means the result are guaranteed | ||||
| This function will find $c = 1/a \mbox{ (mod }b\mbox{)}$ for any value of $a$ such that $(a, b) = 1$ and $b > 0$.  When | ||||
| $b$ is odd a ``fast'' variant is used which finds the inverse twice as fast.   | ||||
| 
 | ||||
| \subsubsection{mp\_gcd(mp\_int *a, mp\_int *b, mp\_int *c)} | ||||
| Finds the greatest common divisor of both $a$ and $b$ and places the result in $c$.  Will work with either positive | ||||
| or negative inputs.   | ||||
| 
 | ||||
| Functions requires no additional memory and approximately $O(N \cdot log(N))$ time. | ||||
| 
 | ||||
| \subsubsection{mp\_lcm(mp\_int *a, mp\_int *b, mp\_int *c)} | ||||
| Finds the least common multiple of both $a$ and $b$ and places the result in $c$.  Will work with either positive | ||||
| or negative inputs.  This is calculated by dividing the product of $a$ and $b$ by the greatest common divisor of  | ||||
| both.   | ||||
| 
 | ||||
| Functions requires no additional memory and approximately $O(4 \cdot N^2)$ time. | ||||
| 
 | ||||
| \subsubsection{mp\_n\_root(mp\_int *a, mp\_digit b, mp\_int c)} | ||||
| Finds the $b$'th root of $a$ and stores it in $b$.  The roots are found such that $\vert c \vert^b \le \vert a \vert$.   | ||||
| Uses the Newton approximation approach which means it converges in $O(log \beta^N)$ time to a final result.  Each iteration | ||||
| requires $b$ multiplications and one division for a total work of $O(6N^2 \cdot log \beta^N) = O(6N^3 \cdot log \beta)$. | ||||
| 
 | ||||
| If the input $a$ is negative and $b$ is even the function returns an error.  Otherwise the function will return a root | ||||
| that has a sign that agrees with the sign of $a$. | ||||
| 
 | ||||
| \subsubsection{mp\_exptmod(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)} | ||||
| Computes $d = a^b \mbox{ (mod }c\mbox{)}$ using a sliding window $k$-ary exponentiation algorithm.  For an $\alpha$-bit | ||||
| exponent it performs $\alpha$ squarings and at most $\lfloor \alpha/k \rfloor + k$ multiplications.  The value of $k$ is | ||||
| chosen to minimize the number of multiplications required for a given value of $\alpha$.  Barrett reductions are used | ||||
| to reduce the squared or multiplied temporary results modulo $c$.  A Barrett reduction requires one division that is | ||||
| performed only and two half multipliers of $N$ digit numbers resulting in approximation $O((N^2)/2)$ work.   | ||||
| 
 | ||||
| Let $\gamma = \lfloor \alpha/k \rfloor + k$ represent the total multiplications.  The total work of a exponentiation is | ||||
| therefore, $O(3 \cdot N^2 + (\alpha + \gamma) \cdot ((N^2)/2) + \alpha \cdot ((N^2 + N)/2) + \gamma \cdot N^2)$ which  | ||||
| simplies to $O(3 \cdot N^2 + \gamma N^2 + \alpha (N^2 + (N/2)))$.  The bulk of the time is spent in the Barrett  | ||||
| reductions and the squaring algorithms.  Since $\gamma < \alpha$ it makes sense to optimize first the Barrett and | ||||
| squaring routines first.  Significant improvements in the future will most likely stem from optimizing these instead | ||||
| of optimizing the multipliers. | ||||
| 
 | ||||
| \section{Timing Analysis} | ||||
| \subsection{Observed Timings} | ||||
| A simple test program ``demo.c'' was developed which builds with either MPI or LibTomMath (without modification).  The | ||||
| @ -467,27 +480,27 @@ Inversion & 1024 & 5,237,957  & 1,054,158   \\ | ||||
| Inversion & 2048 & 17,871,944  & 3,459,683   \\ | ||||
| Inversion & 4096 & 66,610,468  & 11,834,556   \\ | ||||
| \hline | ||||
| Multiply & 128 & 1,426   & 828    \\ | ||||
| Multiply & 256 & 2,551   & 1,393    \\ | ||||
| Multiply & 512 & 7,913   & 2,926    \\ | ||||
| Multiply & 1024 & 28,496   & 8,620  \\ | ||||
| Multiply & 2048 & 109,897   & 28,967    \\ | ||||
| Multiply & 4096 & 469,970   & 105,387    \\ | ||||
| Multiply & 128 & 1,426   & 451     \\ | ||||
| Multiply & 256 & 2,551   & 958     \\ | ||||
| Multiply & 512 & 7,913   & 2,476     \\ | ||||
| Multiply & 1024 & 28,496   & 7,927   \\ | ||||
| Multiply & 2048 & 109,897   & 282,24     \\ | ||||
| Multiply & 4096 & 469,970   & 104,681     \\ | ||||
| \hline  | ||||
| Square & 128 & 1,319   & 869    \\ | ||||
| Square & 256 & 1,776   & 1,362    \\ | ||||
| Square & 512 & 5,399  & 2,571   \\ | ||||
| Square & 1024 & 18,991  & 6,332    \\ | ||||
| Square & 2048 & 72,126  & 18,426   \\ | ||||
| Square & 4096 & 306,269  & 74,908 \\ | ||||
| Square & 128 & 1,319   & 511     \\ | ||||
| Square & 256 & 1,776   & 947     \\ | ||||
| Square & 512 & 5,399  & 2,153    \\ | ||||
| Square & 1024 & 18,991  & 5,733     \\ | ||||
| Square & 2048 & 72,126  & 17,621    \\ | ||||
| Square & 4096 & 306,269  & 70,168  \\ | ||||
| \hline  | ||||
| Exptmod & 512 & 32,021,586  & 5,696,459  \\ | ||||
| Exptmod & 768 & 97,595,492  & 12,428,274   \\ | ||||
| Exptmod & 1024 & 223,302,532  & 22,834,316   \\ | ||||
| Exptmod & 2048 & 1,682,223,369   & 119,888,049    \\ | ||||
| Exptmod & 2560 & 3,268,615,571   & 250,901,263     \\ | ||||
| Exptmod & 3072 & 5,597,240,141   & 391,716,431    \\ | ||||
| Exptmod & 4096 & 13,347,270,891   & 814,429,647     | ||||
| Exptmod & 512 & 32,021,586  & 4,472,406   \\ | ||||
| Exptmod & 768 & 97,595,492  & 10,427,845    \\ | ||||
| Exptmod & 1024 & 223,302,532  & 20,561,722    \\ | ||||
| Exptmod & 2048 & 1,682,223,369   & 113,978,803     \\ | ||||
| Exptmod & 2560 & 3,268,615,571   & 236,650,133      \\ | ||||
| Exptmod & 3072 & 5,597,240,141   & 373,449,291     \\ | ||||
| Exptmod & 4096 & 13,347,270,891   & 787,568,457      | ||||
| 
 | ||||
| \end{tabular} | ||||
| \end{center} | ||||
|  | ||||
| @ -1,3 +1,8 @@ | ||||
| Jan 1st, 2003 | ||||
| v0.07  -- Removed alot of heap operations from core functions to speed them up | ||||
|        -- Added a root finding function [and mp_sqrt macro like from MPI] | ||||
|        -- Added more to manual  | ||||
| 
 | ||||
| Dec 31st, 2002 | ||||
| v0.06  -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc... | ||||
|        -- Cleaned up the header a bit more | ||||
|  | ||||
							
								
								
									
										22
									
								
								demo.c
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								demo.c
									
									
									
									
									
								
							| @ -105,10 +105,9 @@ int main(void) | ||||
|    mp_sub_d(&a, 1, &c); | ||||
|    mp_exptmod(&b, &c, &a, &d); | ||||
|    mp_toradix(&d, buf, 10); | ||||
|    printf("b^p-1 == %s\n", buf);  | ||||
|    printf("b^p-1 == %s\n", buf);      | ||||
| 
 | ||||
| #ifdef TIMER    | ||||
| 
 | ||||
|       mp_read_radix(&a, "340282366920938463463374607431768211455", 10); | ||||
|       mp_read_radix(&b, "340282366920938463463574607431768211455", 10); | ||||
|       while (a.used * DIGIT_BIT < 8192) { | ||||
| @ -156,9 +155,6 @@ int main(void) | ||||
|       printf("Multiplying %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)100000)); | ||||
|       mp_copy(&b, &a); | ||||
|    } | ||||
| 
 | ||||
|    | ||||
|   | ||||
|     | ||||
|    { | ||||
|       char *primes[] = { | ||||
| @ -177,6 +173,7 @@ int main(void) | ||||
|       for (rr = 0; rr < mp_count_bits(&a); rr++) { | ||||
|          mp_mul_2d(&b, 1, &b); | ||||
|          b.dp[0] |= lbit(); | ||||
|          b.used  += 1; | ||||
|       } | ||||
|       mp_sub_d(&a, 1, &c); | ||||
|       mp_mod(&b, &c, &b); | ||||
| @ -198,7 +195,7 @@ int main(void) | ||||
|       printf("Exponentiating %d-bit took %llu cycles\n", mp_count_bits(&a), tt / ((ulong64)35)); | ||||
|    } | ||||
|    } | ||||
|     | ||||
| 
 | ||||
|    mp_read_radix(&a, "340282366920938463463374607431768211455", 10); | ||||
|    mp_read_radix(&b, "234892374891378913789237289378973232333", 10); | ||||
|    while (a.used * DIGIT_BIT < 8192) { | ||||
| @ -223,6 +220,19 @@ int main(void) | ||||
| 
 | ||||
|    inv_n = expt_n = lcm_n = gcd_n = add_n = sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = 0;    | ||||
|    for (;;) { | ||||
|     | ||||
|        /* randomly clear and re-init one variable, this has the affect of triming the alloc space */ | ||||
|        switch (abs(rand()) % 7) { | ||||
|            case 0:  mp_clear(&a); mp_init(&a); break; | ||||
|            case 1:  mp_clear(&b); mp_init(&b); break; | ||||
|            case 2:  mp_clear(&c); mp_init(&c); break; | ||||
|            case 3:  mp_clear(&d); mp_init(&d); break; | ||||
|            case 4:  mp_clear(&e); mp_init(&e); break; | ||||
|            case 5:  mp_clear(&f); mp_init(&f); break; | ||||
|            case 6:  break; /* don't clear any */ | ||||
|        } | ||||
|     | ||||
|     | ||||
|        printf("%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%7lu/%5d\r", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, _ifuncs); | ||||
|        fgets(cmd, 4095, stdin); | ||||
|        cmd[strlen(cmd)-1] = 0; | ||||
|  | ||||
							
								
								
									
										2
									
								
								makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								makefile
									
									
									
									
									
								
							| @ -1,7 +1,7 @@ | ||||
| CC = gcc | ||||
| CFLAGS  += -DDEBUG -Wall -W -O3 -fomit-frame-pointer -funroll-loops  | ||||
| 
 | ||||
| VERSION=0.06 | ||||
| VERSION=0.07 | ||||
| 
 | ||||
| default: test | ||||
| 
 | ||||
|  | ||||
| @ -41,7 +41,7 @@ void rand_num(mp_int *a) | ||||
|    unsigned char buf[512]; | ||||
| 
 | ||||
| top: | ||||
|    size = 1 + ((fgetc(rng)*fgetc(rng)) % 32); | ||||
|    size = 1 + ((fgetc(rng)*fgetc(rng)) % 512); | ||||
|    buf[0] = (fgetc(rng)&1)?1:0; | ||||
|    fread(buf+1, 1, size, rng); | ||||
|    for (n = 0; n < size; n++) { | ||||
| @ -57,7 +57,7 @@ void rand_num2(mp_int *a) | ||||
|    unsigned char buf[512]; | ||||
| 
 | ||||
| top: | ||||
|    size = 1 + ((fgetc(rng)*fgetc(rng)) % 32); | ||||
|    size = 1 + ((fgetc(rng)*fgetc(rng)) % 512); | ||||
|    buf[0] = (fgetc(rng)&1)?1:0; | ||||
|    fread(buf+1, 1, size, rng); | ||||
|    for (n = 0; n < size; n++) { | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user