tommath/bn.c

3368 lines
76 KiB
C

/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is library that provides for multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library is designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://libtommath.iahu.ca
*/
#include "bn.h"
/* chars used in radix conversions */
static const char *s_rmap =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
#undef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#undef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#ifdef DEBUG
/* timing data */
#ifdef TIMER_X86
extern ulong64 gettsc(void);
#else
ulong64 gettsc(void) { return clock(); }
#endif
/* structure to hold timing data */
struct {
char *func;
ulong64 start, end, tot;
} timings[1000000];
/* structure to hold consolidated timing data */
struct _functime {
char *func;
ulong64 tot;
} functime[1000];
static char *_funcs[1000];
int _ifuncs, _itims;
#define REGFUNC(name) int __IX = _itims++; _funcs[_ifuncs++] = name; timings[__IX].func = name; timings[__IX].start = gettsc();
#define DECFUNC() timings[__IX].end = gettsc(); --_ifuncs;
#define VERIFY(val) _verify(val, #val, __LINE__);
/* sort the consolidated timings */
int qsort_helper(const void *A, const void *B)
{
struct _functime *a, *b;
a = (struct _functime *)A;
b = (struct _functime *)B;
if (a->tot > b->tot) return -1;
if (a->tot < b->tot) return 1;
return 0;
}
/* reset debugging information */
void reset_timings(void)
{
_ifuncs = _itims = 0;
}
/* dump the timing data */
void dump_timings(void)
{
int x, y;
ulong64 total;
/* first for every find the total time */
printf("Phase I ... Finding totals (%d samples)...\n", _itims);
for (x = 0; x < _itims; x++) {
timings[x].tot = timings[x].end - timings[x].start;
}
/* now subtract the time for each function where nested functions occured */
printf("Phase II ... Finding dependencies...\n");
for (x = 0; x < _itims-1; x++) {
for (y = x+1; y < _itims && timings[y].start <= timings[x].end; y++) {
timings[x].tot -= timings[y].tot;
if (timings[x].tot > ((ulong64)1 << (ulong64)40)) {
timings[x].tot = 0;
}
}
}
/* now consolidate all the entries */
printf("Phase III... Consolidation...\n");
memset(&functime, 0, sizeof(functime));
total = 0;
for (x = 0; x < _itims; x++) {
total += timings[x].tot;
/* try to find this entry */
for (y = 0; functime[y].func != NULL; y++) {
if (strcmp(timings[x].func, functime[y].func) == 0) {
break;
}
}
if (functime[y].func == NULL) {
/* new entry */
functime[y].func = timings[x].func;
functime[y].tot = timings[x].tot;
} else {
functime[y].tot += timings[x].tot;
}
}
for (x = 0; functime[x].func != NULL; x++);
/* sort and dump */
qsort(&functime, x, sizeof(functime[0]), &qsort_helper);
for (x = 0; functime[x].func != NULL; x++) {
if (functime[x].tot > 0 && strcmp(functime[x].func, "_verify") != 0) {
printf("%30s: %20llu (%3llu.%03llu %%)\n", functime[x].func, functime[x].tot, (functime[x].tot * (ulong64)100) / total, ((functime[x].tot * (ulong64)100000) / total) % (ulong64)1000);
}
}
}
static void _verify(mp_int *a, char *name, int line)
{
int n, y;
static const char *err[] = { "Null DP", "alloc < used", "digits above used" };
REGFUNC("_verify");
/* dp null ? */
y = 0;
if (a->dp == NULL) goto error;
/* used should be <= alloc */
++y;
if (a->alloc < a->used) goto error;
/* digits above used should be zero */
++y;
for (n = a->used; n < a->alloc; n++) {
if (a->dp[n]) goto error;
}
/* ok */
DECFUNC();
return;
error:
printf("Error (%s) with variable {%s} on line %d\n", err[y], name, line);
for (n = _ifuncs - 1; n >= 0; n--) {
if (_funcs[n] != NULL) {
printf("> %s\n", _funcs[n]);
}
}
printf("\n");
exit(0);
}
#else /* don't use DEBUG stuff so these macros are blank */
#define REGFUNC(name)
#define DECFUNC()
#define VERIFY(val)
#endif
/* init a new bigint */
int mp_init(mp_int *a)
{
REGFUNC("mp_init");
/* allocate ram required and clear it */
a->dp = calloc(sizeof(mp_digit), MP_PREC);
if (a->dp == NULL) {
DECFUNC();
return MP_MEM;
}
/* set the used to zero, allocated digit to the default precision
* and sign to positive */
a->used = 0;
a->alloc = MP_PREC;
a->sign = MP_ZPOS;
VERIFY(a);
DECFUNC();
return MP_OKAY;
}
/* clear one (frees) */
void mp_clear(mp_int *a)
{
REGFUNC("mp_clear");
if (a->dp != NULL) {
VERIFY(a);
/* first zero the digits */
memset(a->dp, 0, sizeof(mp_digit) * a->used);
/* free ram */
free(a->dp);
/* reset members to make debugging easier */
a->dp = NULL;
a->alloc = a->used = 0;
}
DECFUNC();
}
void mp_exch(mp_int *a, mp_int *b)
{
mp_int t;
REGFUNC("mp_exch");
VERIFY(a);
VERIFY(b);
t = *a; *a = *b; *b = t;
DECFUNC();
}
/* grow as required */
static int mp_grow(mp_int *a, int size)
{
int i, n;
REGFUNC("mp_grow");
VERIFY(a);
/* if the alloc size is smaller alloc more ram */
if (a->alloc < size) {
size += (MP_PREC*2) - (size & (MP_PREC-1)); /* ensure there are always at least 16 digits extra on top */
a->dp = realloc(a->dp, sizeof(mp_digit)*size);
if (a->dp == NULL) {
DECFUNC();
return MP_MEM;
}
n = a->alloc;
a->alloc = size;
for (i = n; i < a->alloc; i++) {
a->dp[i] = 0;
}
}
DECFUNC();
return MP_OKAY;
}
/* shrink a bignum */
int mp_shrink(mp_int *a)
{
REGFUNC("mp_shrink");
VERIFY(a);
if (a->alloc != a->used) {
if ((a->dp = realloc(a->dp, sizeof(mp_digit) * a->used)) == NULL) {
DECFUNC();
return MP_MEM;
}
a->alloc = a->used;
}
DECFUNC();
return MP_OKAY;
}
/* trim unused digits */
static void mp_clamp(mp_int *a)
{
REGFUNC("mp_clamp");
VERIFY(a);
while (a->used > 0 && a->dp[a->used-1] == 0) --(a->used);
if (a->used == 0) {
a->sign = MP_ZPOS;
}
DECFUNC();
}
/* set to zero */
void mp_zero(mp_int *a)
{
REGFUNC("mp_zero");
VERIFY(a);
a->sign = MP_ZPOS;
a->used = 0;
memset(a->dp, 0, sizeof(mp_digit) * a->alloc);
DECFUNC();
}
/* set to a digit */
void mp_set(mp_int *a, mp_digit b)
{
REGFUNC("mp_set");
VERIFY(a);
mp_zero(a);
a->dp[0] = b & MP_MASK;
a->used = (a->dp[0] != 0) ? 1: 0;
DECFUNC();
}
/* set a 32-bit const */
int mp_set_int(mp_int *a, unsigned long b)
{
int x, res;
REGFUNC("mp_set_int");
VERIFY(a);
mp_zero(a);
/* set four bits at a time, simplest solution to the what if DIGIT_BIT==7 case */
for (x = 0; x < 8; x++) {
/* shift the number up four bits */
if ((res = mp_mul_2d(a, 4, a)) != MP_OKAY) {
DECFUNC();
return res;
}
/* OR in the top four bits of the source */
a->dp[0] |= (b>>28)&15;
/* shift the source up to the next four bits */
b <<= 4;
/* ensure that digits are not clamped off */
a->used += 32/DIGIT_BIT + 1;
}
mp_clamp(a);
DECFUNC();
return MP_OKAY;
}
/* init a mp_init and grow it to a given size */
int mp_init_size(mp_int *a, int size)
{
REGFUNC("mp_init_size");
/* pad up so there are at least 16 zero digits */
size += (MP_PREC*2) - (size & (MP_PREC-1)); /* ensure there are always at least 16 digits extra on top */
a->dp = calloc(sizeof(mp_digit), size);
if (a->dp == NULL) {
DECFUNC();
return MP_MEM;
}
a->used = 0;
a->alloc = size;
a->sign = MP_ZPOS;
DECFUNC();
return MP_OKAY;
}
/* copy, b = a */
int mp_copy(mp_int *a, mp_int *b)
{
int res, n;
REGFUNC("mp_copy");
VERIFY(a);
VERIFY(b);
/* if dst == src do nothing */
if (a == b || a->dp == b->dp) {
DECFUNC();
return MP_OKAY;
}
/* grow dest */
if ((res = mp_grow(b, a->used)) != MP_OKAY) {
DECFUNC();
return res;
}
/* zero b and copy the parameters over */
b->used = a->used;
b->sign = a->sign;
/* copy all the digits */
for (n = 0; n < a->used; n++) {
b->dp[n] = a->dp[n];
}
/* clear high digits */
for (n = b->used; n < b->alloc; n++) {
b->dp[n] = 0;
}
DECFUNC();
return MP_OKAY;
}
/* creates "a" then copies b into it */
int mp_init_copy(mp_int *a, mp_int *b)
{
int res;
REGFUNC("mp_init_copy");
VERIFY(b);
if ((res = mp_init(a)) != MP_OKAY) {
DECFUNC();
return res;
}
res = mp_copy(b, a);
DECFUNC();
return res;
}
/* b = |a| */
int mp_abs(mp_int *a, mp_int *b)
{
int res;
REGFUNC("mp_abs");
VERIFY(a);
VERIFY(b);
if ((res = mp_copy(a, b)) != MP_OKAY) {
DECFUNC();
return res;
}
b->sign = MP_ZPOS;
DECFUNC();
return MP_OKAY;
}
/* b = -a */
int mp_neg(mp_int *a, mp_int *b)
{
int res;
REGFUNC("mp_neg");
VERIFY(a);
VERIFY(b);
if ((res = mp_copy(a, b)) != MP_OKAY) {
DECFUNC();
return res;
}
b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
DECFUNC();
return MP_OKAY;
}
/* compare maginitude of two ints (unsigned) */
int mp_cmp_mag(mp_int *a, mp_int *b)
{
int n;
REGFUNC("mp_cmp_mag");
VERIFY(a);
VERIFY(b);
/* compare based on # of non-zero digits */
if (a->used > b->used) {
DECFUNC();
return MP_GT;
} else if (a->used < b->used) {
DECFUNC();
return MP_LT;
}
/* compare based on digits */
for (n = a->used - 1; n >= 0; n--) {
if (a->dp[n] > b->dp[n]) {
DECFUNC();
return MP_GT;
} else if (a->dp[n] < b->dp[n]) {
DECFUNC();
return MP_LT;
}
}
DECFUNC();
return MP_EQ;
}
/* compare two ints (signed)*/
int mp_cmp(mp_int *a, mp_int *b)
{
int res;
REGFUNC("mp_cmp");
VERIFY(a);
VERIFY(b);
/* compare based on sign */
if (a->sign == MP_NEG && b->sign == MP_ZPOS) {
DECFUNC();
return MP_LT;
} else if (a->sign == MP_ZPOS && b->sign == MP_NEG) {
DECFUNC();
return MP_GT;
}
res = mp_cmp_mag(a, b);
DECFUNC();
return res;
}
/* compare a digit */
int mp_cmp_d(mp_int *a, mp_digit b)
{
REGFUNC("mp_cmp_d");
VERIFY(a);
if (a->sign == MP_NEG) {
DECFUNC();
return MP_LT;
}
if (a->used > 1) {
DECFUNC();
return MP_GT;
}
if (a->dp[0] > b) {
DECFUNC();
return MP_GT;
} else if (a->dp[0] < b) {
DECFUNC();
return MP_LT;
} else {
DECFUNC();
return MP_EQ;
}
}
/* shift right a certain amount of digits */
void mp_rshd(mp_int *a, int b)
{
int x;
REGFUNC("mp_rshd");
VERIFY(a);
/* if b <= 0 then ignore it */
if (b <= 0) {
DECFUNC();
return;
}
/* if b > used then simply zero it and return */
if (a->used < b) {
mp_zero(a);
DECFUNC();
return;
}
/* shift the digits down */
for (x = 0; x < (a->used - b); x++) {
a->dp[x] = a->dp[x + b];
}
/* zero the top digits */
for (; x < a->used; x++) {
a->dp[x] = 0;
}
mp_clamp(a);
DECFUNC();
}
/* shift left a certain amount of digits */
int mp_lshd(mp_int *a, int b)
{
int x, res;
REGFUNC("mp_lshd");
VERIFY(a);
/* if its less than zero return */
if (b <= 0) {
DECFUNC();
return MP_OKAY;
}
/* grow to fit the new digits */
if ((res = mp_grow(a, a->used + b)) != MP_OKAY) {
DECFUNC();
return res;
}
/* increment the used by the shift amount than copy upwards */
a->used += b;
for (x = a->used-1; x >= b; x--) {
a->dp[x] = a->dp[x - b];
}
/* zero the lower digits */
for (x = 0; x < b; x++) {
a->dp[x] = 0;
}
mp_clamp(a);
DECFUNC();
return MP_OKAY;
}
/* calc a value mod 2^b */
int mp_mod_2d(mp_int *a, int b, mp_int *c)
{
int x, res;
REGFUNC("mp_mod_2d");
VERIFY(a);
VERIFY(c);
/* if b is <= 0 then zero the int */
if (b <= 0) {
mp_zero(c);
DECFUNC();
return MP_OKAY;
}
/* if the modulus is larger than the value than return */
if (b > (int)(a->used * DIGIT_BIT)) {
res = mp_copy(a, c);
DECFUNC();
return res;
}
/* copy */
if ((res = mp_copy(a, c)) != MP_OKAY) {
DECFUNC();
return res;
}
/* zero digits above the last digit of the modulus */
for (x = (b/DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
c->dp[x] = 0;
}
/* clear the digit that is not completely outside/inside the modulus */
c->dp[b/DIGIT_BIT] &= (mp_digit)((((mp_digit)1)<<(((mp_digit)b) % DIGIT_BIT)) - ((mp_digit)1));
mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* shift right by a certain bit count (store quotient in c, remainder in d) */
int mp_div_2d(mp_int *a, int b, mp_int *c, mp_int *d)
{
mp_digit D, r, rr;
int x, res;
mp_int t;
REGFUNC("mp_div_2d");
VERIFY(a);
VERIFY(c);
if (d != NULL) { VERIFY(d); }
/* if the shift count is <= 0 then we do no work */
if (b <= 0) {
res = mp_copy(a, c);
if (d != NULL) { mp_zero(d); }
DECFUNC();
return res;
}
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
/* get the remainder */
if (d != NULL) {
if ((res = mp_mod_2d(a, b, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
}
/* copy */
if ((res = mp_copy(a, c)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
/* shift by as many digits in the bit count */
mp_rshd(c, b/DIGIT_BIT);
/* shift any bit count < DIGIT_BIT */
D = (mp_digit)(b % DIGIT_BIT);
if (D != 0) {
r = 0;
for (x = c->used - 1; x >= 0; x--) {
/* get the lower bits of this word in a temp */
rr = c->dp[x] & ((mp_digit)((1U<<D)-1U));
/* shift the current word and mix in the carry bits from the previous word */
c->dp[x] = (c->dp[x] >> D) | (r << (DIGIT_BIT-D));
/* set the carry to the carry bits of the current word found above */
r = rr;
}
}
mp_clamp(c);
res = MP_OKAY;
if (d != NULL) {
mp_exch(&t, d);
}
mp_clear(&t);
DECFUNC();
return MP_OKAY;
}
/* shift left by a certain bit count */
int mp_mul_2d(mp_int *a, int b, mp_int *c)
{
mp_digit d, r, rr;
int x, res;
REGFUNC("mp_mul_2d");
VERIFY(a);
VERIFY(c);
/* copy */
if ((res = mp_copy(a, c)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_grow(c, c->used + b/DIGIT_BIT + 1)) != MP_OKAY) {
DECFUNC();
return res;
}
/* shift by as many digits in the bit count */
if ((res = mp_lshd(c, b/DIGIT_BIT)) != MP_OKAY) {
DECFUNC();
return res;
}
c->used = c->alloc;
/* shift any bit count < DIGIT_BIT */
d = (mp_digit)(b % DIGIT_BIT);
if (d != 0) {
r = 0;
for (x = 0; x < c->used; x++) {
/* get the higher bits of the current word */
rr = (c->dp[x] >> (DIGIT_BIT - d)) & ((mp_digit)((1U<<d)-1U));
/* shift the current word and OR in the carry */
c->dp[x] = ((c->dp[x] << d) | r) & MP_MASK;
/* set the carry to the carry bits of the current word */
r = rr;
}
}
mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* b = a/2 */
int mp_div_2(mp_int *a, mp_int *b)
{
mp_digit r, rr;
int x, res;
REGFUNC("mp_div_2");
VERIFY(a);
VERIFY(b);
/* copy */
if ((res = mp_copy(a, b)) != MP_OKAY) {
DECFUNC();
return res;
}
r = 0;
for (x = b->used - 1; x >= 0; x--) {
rr = b->dp[x] & 1;
b->dp[x] = (b->dp[x] >> 1) | (r << (DIGIT_BIT-1));
r = rr;
}
mp_clamp(b);
DECFUNC();
return MP_OKAY;
}
/* b = a*2 */
int mp_mul_2(mp_int *a, mp_int *b)
{
mp_digit r, rr;
int x, res;
REGFUNC("mp_mul_2");
VERIFY(a);
VERIFY(b);
/* copy */
if ((res = mp_copy(a, b)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_grow(b, b->used + 1)) != MP_OKAY) {
DECFUNC();
return res;
}
b->used = b->alloc;
/* shift any bit count < DIGIT_BIT */
r = 0;
for (x = 0; x < b->used; x++) {
rr = (b->dp[x] >> (DIGIT_BIT - 1)) & 1;
b->dp[x] = ((b->dp[x] << 1) | r) & MP_MASK;
r = rr;
}
mp_clamp(b);
DECFUNC();
return MP_OKAY;
}
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
{
mp_int *x;
int olduse, res, min, max, i;
mp_digit u;
REGFUNC("s_mp_add");
VERIFY(a);
VERIFY(b);
VERIFY(c);
/* find sizes, we let |a| <= |b| which means we have to sort
* them. "x" will point to the input with the most digits
*/
if (a->used > b->used) {
min = b->used;
max = a->used;
x = a;
} else if (a->used < b->used) {
min = a->used;
max = b->used;
x = b;
} else {
min = max = a->used;
x = NULL;
}
/* init result */
if (c->alloc < max+1) {
if ((res = mp_grow(c, max+1)) != MP_OKAY) {
DECFUNC();
return res;
}
}
olduse = c->used;
c->used = max + 1;
/* add digits from lower part */
/* set the carry to zero */
u = 0;
for (i = 0; i < min; i++) {
/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
c->dp[i] = a->dp[i] + b->dp[i] + u;
/* U = carry bit of T[i] */
u = (c->dp[i] >> DIGIT_BIT) & 1;
/* take away carry bit from T[i] */
c->dp[i] &= MP_MASK;
}
/* now copy higher words if any, that is in A+B if A or B has more digits add those in */
if (min != max) {
for (; i < max; i++) {
/* T[i] = X[i] + U */
c->dp[i] = x->dp[i] + u;
/* U = carry bit of T[i] */
u = (c->dp[i] >> DIGIT_BIT) & 1;
/* take away carry bit from T[i] */
c->dp[i] &= MP_MASK;
}
}
/* add carry */
c->dp[i] = u;
/* clear digits above used (since we may not have grown result above) */
for (i = c->used; i < olduse; i++) {
c->dp[i] = 0;
}
mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* low level subtraction (assumes a > b), HAC pp.595 Algorithm 14.9 */
static int s_mp_sub(mp_int *a, mp_int *b, mp_int *c)
{
int olduse, res, min, max, i;
mp_digit u;
REGFUNC("s_mp_sub");
VERIFY(a);
VERIFY(b);
VERIFY(c);
/* find sizes */
min = b->used;
max = a->used;
/* init result */
if (c->alloc < max) {
if ((res = mp_grow(c, max)) != MP_OKAY) {
DECFUNC();
return res;
}
}
olduse = c->used;
c->used = max;
/* sub digits from lower part */
/* set carry to zero */
u = 0;
for (i = 0; i < min; i++) {
/* T[i] = A[i] - B[i] - U */
c->dp[i] = a->dp[i] - (b->dp[i] + u);
/* U = carry bit of T[i] */
u = (c->dp[i] >> DIGIT_BIT) & 1;
/* Clear carry from T[i] */
c->dp[i] &= MP_MASK;
}
/* now copy higher words if any, e.g. if A has more digits than B */
if (min != max) {
for (; i < max; i++) {
/* T[i] = A[i] - U */
c->dp[i] = a->dp[i] - u;
/* U = carry bit of T[i] */
u = (c->dp[i] >> DIGIT_BIT) & 1;
/* Clear carry from T[i] */
c->dp[i] &= MP_MASK;
}
}
/* clear digits above used (since we may not have grown result above) */
for (i = c->used; i < olduse; i++) {
c->dp[i] = 0;
}
mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* low level multiplication */
#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
/* Fast (comba) multiplier
*
* This is the fast column-array [comba] multiplier. It is designed to compute
* the columns of the product first then handle the carries afterwards. This
* has the effect of making the nested loops that compute the columns very
* simple and schedulable on super-scalar processors.
*
*/
static int fast_s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
{
int olduse, res, pa, ix;
mp_word W[512];
REGFUNC("fast_s_mp_mul_digs");
VERIFY(a);
VERIFY(b);
VERIFY(c);
if (c->alloc < digs) {
if ((res = mp_grow(c, digs)) != MP_OKAY) {
DECFUNC();
return res;
}
}
/* clear temp buf (the columns) */
memset(W, 0, sizeof(mp_word) * digs);
/* calculate the columns */
pa = a->used;
for (ix = 0; ix < pa; ix++) {
/* this multiplier has been modified to allow you to control how many digits
* of output are produced. So at most we want to make upto "digs" digits
* of output
*/
/* this adds products to distinct columns (at ix+iy) of W
* note that each step through the loop is not dependent on
* the previous which means the compiler can easily unroll
* the loop without scheduling problems
*/
{
register mp_digit tmpx, *tmpy;
register mp_word *_W;
register int iy, pb;
/* alias for the the word on the left e.g. A[ix] * A[iy] */
tmpx = a->dp[ix];
/* alias for the right side */
tmpy = b->dp;
/* alias for the columns, each step through the loop adds a new
term to each column
*/
_W = W + ix;
/* the number of digits is limited by their placement. E.g.
we avoid multiplying digits that will end up above the # of
digits of precision requested
*/
pb = MIN(b->used, digs - ix);
for (iy = 0; iy < pb; iy++) {
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
}
}
}
/* setup dest */
olduse = c->used;
c->used = digs;
/* At this point W[] contains the sums of each column. To get the
* correct result we must take the extra bits from each column and
* carry them down
*
* Note that while this adds extra code to the multiplier it saves time
* since the carry propagation is removed from the above nested loop.
* This has the effect of reducing the work from N*(N+N*c)==N^2 + c*N^2 to
* N^2 + N*c where c is the cost of the shifting. On very small numbers
* this is slower but on most cryptographic size numbers it is faster.
*/
for (ix = 1; ix < digs; ix++) {
W[ix] += (W[ix-1] >> ((mp_word)DIGIT_BIT));
c->dp[ix-1] = (mp_digit)(W[ix-1] & ((mp_word)MP_MASK));
}
c->dp[digs-1] = (mp_digit)(W[digs-1] & ((mp_word)MP_MASK));
/* clear unused */
for (ix = c->used; ix < olduse; ix++) {
c->dp[ix] = 0;
}
mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* multiplies |a| * |b| and only computes upto digs digits of result
* HAC pp. 595, Algorithm 14.12 Modified so you can control how many digits of
* output are created.
*/
static int s_mp_mul_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
{
mp_int t;
int res, pa, pb, ix, iy;
mp_digit u;
mp_word r;
mp_digit tmpx, *tmpt, *tmpy;
REGFUNC("s_mp_mul_digs");
VERIFY(a);
VERIFY(b);
VERIFY(c);
/* can we use the fast multiplier?
*
* The fast multiplier can be used if the output will have less than
* 512 digits and the number of digits won't affect carry propagation
*/
if ((digs < 512) && digs < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT)))) {
res = fast_s_mp_mul_digs(a,b,c,digs);
DECFUNC();
return res;
}
if ((res = mp_init_size(&t, digs)) != MP_OKAY) {
DECFUNC();
return res;
}
t.used = digs;
/* compute the digits of the product directly */
pa = a->used;
for (ix = 0; ix < pa; ix++) {
/* set the carry to zero */
u = 0;
/* limit ourselves to making digs digits of output */
pb = MIN(b->used, digs - ix);
/* setup some aliases */
tmpx = a->dp[ix];
tmpt = &(t.dp[ix]);
tmpy = b->dp;
/* compute the columns of the output and propagate the carry */
for (iy = 0; iy < pb; iy++) {
/* compute the column as a mp_word */
r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word)u);
/* the new column is the lower part of the result */
*tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));
/* get the carry word from the result */
u = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
}
if (ix+iy<digs)
*tmpt = u;
}
mp_clamp(&t);
mp_exch(&t, c);
mp_clear(&t);
DECFUNC();
return MP_OKAY;
}
/* this is a modified version of fast_s_mp_mul_digs that only produces
* output digits *above* digs. See the comments for fast_s_mp_mul_digs
* to see how it works.
*
* This is used in the Barrett reduction since for one of the multiplications
* only the higher digits were needed. This essentially halves the work.
*/
static int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
{
int oldused, newused, res, pa, pb, ix;
mp_word W[512];
REGFUNC("fast_s_mp_mul_high_digs");
VERIFY(a);
VERIFY(b);
VERIFY(c);
newused = a->used + b->used + 1;
if (c->alloc < newused) {
if ((res = mp_grow(c, newused)) != MP_OKAY) {
DECFUNC();
return res;
}
}
/* like the other comba method we compute the columns first */
pa = a->used;
pb = b->used;
memset(&W[digs], 0, (pa + pb + 1 - digs) * sizeof(mp_word));
for (ix = 0; ix < pa; ix++) {
{
register mp_digit tmpx, *tmpy;
register int iy;
register mp_word *_W;
/* work todo, that is we only calculate digits that are at "digs" or above */
iy = digs - ix;
/* copy of word on the left of A[ix] * B[iy] */
tmpx = a->dp[ix];
/* alias for right side */
tmpy = b->dp + iy;
/* alias for the columns of output. Offset to be equal to or above the
* smallest digit place requested
*/
_W = &(W[digs]);
/* compute column products for digits above the minimum */
for (; iy < pb; iy++) {
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
}
}
}
/* setup dest */
oldused = c->used;
c->used = newused;
/* now convert the array W downto what we need */
for (ix = digs+1; ix < (pa+pb+1); ix++) {
W[ix] += (W[ix-1] >> ((mp_word)DIGIT_BIT));
c->dp[ix-1] = (mp_digit)(W[ix-1] & ((mp_word)MP_MASK));
}
c->dp[(pa+pb+1)-1] = (mp_digit)(W[(pa+pb+1)-1] & ((mp_word)MP_MASK));
for (ix = c->used; ix < oldused; ix++) {
c->dp[ix] = 0;
}
mp_clamp(c);
DECFUNC();
return MP_OKAY;
}
/* multiplies |a| * |b| and does not compute the lower digs digits
* [meant to get the higher part of the product]
*/
static int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs)
{
mp_int t;
int res, pa, pb, ix, iy;
mp_digit u;
mp_word r;
mp_digit tmpx, *tmpt, *tmpy;
REGFUNC("s_mp_mul_high_digs");
VERIFY(a);
VERIFY(b);
VERIFY(c);
/* can we use the fast multiplier? */
if (((a->used + b->used + 1) < 512) && MAX(a->used, b->used) < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT)))) {
res = fast_s_mp_mul_high_digs(a,b,c,digs);
DECFUNC();
return res;
}
if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
DECFUNC();
return res;
}
t.used = a->used + b->used + 1;
pa = a->used;
pb = b->used;
for (ix = 0; ix < pa; ix++) {
/* clear the carry */
u = 0;
/* left hand side of A[ix] * B[iy] */
tmpx = a->dp[ix];
/* alias to the address of where the digits will be stored */
tmpt = &(t.dp[digs]);
/* alias for where to read the right hand side from */
tmpy = b->dp + (digs - ix);
for (iy = digs - ix; iy < pb; iy++) {
/* calculate the double precision result */
r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word)u);
/* get the lower part */
*tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));
/* carry the carry */
u = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
}
*tmpt = u;
}
mp_clamp(&t);
mp_exch(&t, c);
mp_clear(&t);
DECFUNC();
return MP_OKAY;
}
/* fast squaring
*
* This is the comba method where the columns of the product are computed first
* then the carries are computed. This has the effect of making a very simple
* inner loop that is executed the most
*
* W2 represents the outer products and W the inner.
*
* A further optimizations is made because the inner products are of the form
* "A * B * 2". The *2 part does not need to be computed until the end which is
* good because 64-bit shifts are slow!
*
*
*/
static int fast_s_mp_sqr(mp_int *a, mp_int *b)
{
int olduse, newused, res, ix, pa;
mp_word W2[512], W[512];
REGFUNC("fast_s_mp_sqr");
VERIFY(a);
VERIFY(b);
pa = a->used;
newused = pa + pa + 1;
if (b->alloc < newused) {
if ((res = mp_grow(b, newused)) != MP_OKAY) {
DECFUNC();
return res;
}
}
/* zero temp buffer (columns) */
memset(W, 0, (pa+pa+1)*sizeof(mp_word));
memset(W2, 0, (pa+pa+1)*sizeof(mp_word));
for (ix = 0; ix < pa; ix++) {
/* compute the outer product */
W2[ix+ix] += ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
{
register mp_digit tmpx, *tmpy;
register mp_word *_W;
register int iy;
/* copy of left side */
tmpx = a->dp[ix];
/* alias for right side */
tmpy = a->dp + (ix + 1);
_W = &(W[ix+ix+1]);
/* inner products */
for (iy = ix + 1; iy < pa; iy++) {
*_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
}
}
}
/* double first value, since the inner products are half of what they should be */
W[0] += W[0] + W2[0];
/* setup dest */
olduse = b->used;
b->used = newused;
/* now compute digits */
for (ix = 1; ix < (pa+pa+1); ix++) {
/* double/add next digit */
W[ix] += W[ix] + W2[ix];
W[ix] = W[ix] + (W[ix-1] >> ((mp_word)DIGIT_BIT));
b->dp[ix-1] = (mp_digit)(W[ix-1] & ((mp_word)MP_MASK));
}
b->dp[(pa+pa+1)-1] = (mp_digit)(W[(pa+pa+1)-1] & ((mp_word)MP_MASK));
/* clear high */
for (ix = b->used; ix < olduse; ix++) {
b->dp[ix] = 0;
}
/* fix the sign (since we no longer make a fresh temp) */
b->sign = MP_ZPOS;
mp_clamp(b);
DECFUNC();
return MP_OKAY;
}
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
static int s_mp_sqr(mp_int *a, mp_int *b)
{
mp_int t;
int res, ix, iy, pa;
mp_word r, u;
mp_digit tmpx, *tmpt;
REGFUNC("s_mp_sqr");
VERIFY(a);
VERIFY(b);
/* can we use the fast multiplier? */
if (((a->used * 2 + 1) < 512) && a->used < (1<<( (CHAR_BIT*sizeof(mp_word)) - (2*DIGIT_BIT) - 1))) {
res = fast_s_mp_sqr(a,b);
DECFUNC();
return res;
}
pa = a->used;
if ((res = mp_init_size(&t, pa + pa + 1)) != MP_OKAY) {
DECFUNC();
return res;
}
t.used = pa + pa + 1;
for (ix = 0; ix < pa; ix++) {
/* first calculate the digit at 2*ix */
/* calculate double precision result */
r = ((mp_word)t.dp[ix+ix]) + ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
/* store lower part in result */
t.dp[ix+ix] = (mp_digit)(r & ((mp_word)MP_MASK));
/* get the carry */
u = (r >> ((mp_word)DIGIT_BIT));
/* left hand side of A[ix] * A[iy] */
tmpx = a->dp[ix];
/* alias for where to store the results */
tmpt = &(t.dp[ix+ix+1]);
for (iy = ix + 1; iy < pa; iy++) {
/* first calculate the product */
r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
/* now calculate the double precision result, note we use
* addition instead of *2 since its easier to optimize
*/
r = ((mp_word)*tmpt) + r + r + ((mp_word)u);
/* store lower part */
*tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));
/* get carry */
u = (r >> ((mp_word)DIGIT_BIT));
}
r = ((mp_word)*tmpt) + u;
*tmpt = (mp_digit)(r & ((mp_word)MP_MASK));
u = (r >> ((mp_word)DIGIT_BIT));
/* propagate upwards */
++tmpt;
while (u != ((mp_word)0)) {
r = ((mp_word)*tmpt) + ((mp_word)1);
*tmpt++ = (mp_digit)(r & ((mp_word)MP_MASK));
u = (r >> ((mp_word)DIGIT_BIT));
}
}
mp_clamp(&t);
mp_exch(&t, b);
mp_clear(&t);
DECFUNC();
return MP_OKAY;
}
/* high level addition (handles signs) */
int mp_add(mp_int *a, mp_int *b, mp_int *c)
{
int sa, sb, res;
REGFUNC("mp_add");
VERIFY(a);
VERIFY(b);
VERIFY(c);
sa = a->sign;
sb = b->sign;
/* handle four cases */
if (sa == MP_ZPOS && sb == MP_ZPOS) {
/* both positive */
res = s_mp_add(a, b, c);
c->sign = MP_ZPOS;
} else if (sa == MP_ZPOS && sb == MP_NEG) {
/* a + -b == a - b, but if b>a then we do it as -(b-a) */
if (mp_cmp_mag(a, b) == MP_LT) {
res = s_mp_sub(b, a, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub(a, b, c);
c->sign = MP_ZPOS;
}
} else if (sa == MP_NEG && sb == MP_ZPOS) {
/* -a + b == b - a, but if a>b then we do it as -(a-b) */
if (mp_cmp_mag(a, b) == MP_GT) {
res = s_mp_sub(a, b, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub(b, a, c);
c->sign = MP_ZPOS;
}
} else {
/* -a + -b == -(a + b) */
res = s_mp_add(a, b, c);
c->sign = MP_NEG;
}
DECFUNC();
return res;
}
/* high level subtraction (handles signs) */
int mp_sub(mp_int *a, mp_int *b, mp_int *c)
{
int sa, sb, res;
REGFUNC("mp_sub");
VERIFY(a);
VERIFY(b);
VERIFY(c);
sa = a->sign;
sb = b->sign;
/* handle four cases */
if (sa == MP_ZPOS && sb == MP_ZPOS) {
/* both positive, a - b, but if b>a then we do -(b - a) */
if (mp_cmp_mag(a, b) == MP_LT) {
/* b>a */
res = s_mp_sub(b, a, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub(a, b, c);
c->sign = MP_ZPOS;
}
} else if (sa == MP_ZPOS && sb == MP_NEG) {
/* a - -b == a + b */
res = s_mp_add(a, b, c);
c->sign = MP_ZPOS;
} else if (sa == MP_NEG && sb == MP_ZPOS) {
/* -a - b == -(a + b) */
res = s_mp_add(a, b, c);
c->sign = MP_NEG;
} else {
/* -a - -b == b - a, but if a>b == -(a - b) */
if (mp_cmp_mag(a, b) == MP_GT) {
res = s_mp_sub(a, b, c);
c->sign = MP_NEG;
} else {
res = s_mp_sub(b, a, c);
c->sign = MP_ZPOS;
}
}
DECFUNC();
return res;
}
/* c = |a| * |b| using Karatsuba Multiplication */
static int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c)
{
mp_int x0, x1, y0, y1, t1, t2, x0y0, x1y1;
int B, err, x;
REGFUNC("mp_karatsuba_mul");
VERIFY(a);
VERIFY(b);
VERIFY(c);
err = MP_MEM;
/* min # of digits */
B = MIN(a->used, b->used);
/* now divide in two */
B = B/2;
/* init copy all the temps */
if (mp_init_size(&x0, B) != MP_OKAY) goto ERR;
if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0;
if (mp_init_size(&y0, B) != MP_OKAY) goto X1;
if (mp_init_size(&y1, b->used - B) != MP_OKAY) goto Y0;
/* init temps */
if (mp_init(&t1) != MP_OKAY) goto Y1;
if (mp_init(&t2) != MP_OKAY) goto T1;
if (mp_init(&x0y0) != MP_OKAY) goto T2;
if (mp_init(&x1y1) != MP_OKAY) goto X0Y0;
/* now shift the digits */
x0.sign = x1.sign = a->sign;
y0.sign = y1.sign = b->sign;
x0.used = y0.used = B;
x1.used = a->used - B;
y1.used = b->used - B;
for (x = 0; x < B; x++) {
x0.dp[x] = a->dp[x];
y0.dp[x] = b->dp[x];
}
for (x = B; x < a->used; x++) {
x1.dp[x-B] = a->dp[x];
}
for (x = B; x < b->used; x++) {
y1.dp[x-B] = b->dp[x];
}
mp_clamp(&x0);
mp_clamp(&x1);
mp_clamp(&y0);
mp_clamp(&y1);
/* now calc the products x0y0 and x1y1 */
if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY) goto X1Y1; /* x0y0 = x0*y0 */
if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY) goto X1Y1; /* x1y1 = x1*y1 */
/* now calc x1-x0 and y1-y0 */
if (mp_sub(&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */
if (mp_sub(&y1, &y0, &t2) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */
if (mp_mul(&t1, &t2, &t1) != MP_OKAY) goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
/* add x0y0 */
if (mp_add(&x0y0, &x1y1, &t2) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */
if (mp_sub(&t2, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
/* shift by B */
if (mp_lshd(&t1, B) != MP_OKAY) goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
if (mp_lshd(&x1y1, B*2) != MP_OKAY) goto X1Y1; /* x1y1 = x1y1 << 2*B */
if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 */
if (mp_add(&t1, &x1y1, c) != MP_OKAY) goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
err = MP_OKAY;
X1Y1: mp_clear(&x1y1);
X0Y0: mp_clear(&x0y0);
T2 : mp_clear(&t2);
T1 : mp_clear(&t1);
Y1 : mp_clear(&y1);
Y0 : mp_clear(&y0);
X1 : mp_clear(&x1);
X0 : mp_clear(&x0);
ERR :
DECFUNC();
return err;
}
/* high level multiplication (handles sign) */
int mp_mul(mp_int *a, mp_int *b, mp_int *c)
{
int res, neg;
REGFUNC("mp_mul");
VERIFY(a);
VERIFY(b);
VERIFY(c);
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
if (MIN(a->used, b->used) > KARATSUBA_MUL_CUTOFF) {
res = mp_karatsuba_mul(a, b, c);
} else {
res = s_mp_mul(a, b, c);
}
c->sign = neg;
DECFUNC();
return res;
}
/* Karatsuba squaring, computes b = a*a */
static int mp_karatsuba_sqr(mp_int *a, mp_int *b)
{
mp_int x0, x1, t1, t2, x0x0, x1x1;
int B, err, x;
REGFUNC("mp_karatsuba_sqr");
VERIFY(a);
VERIFY(b);
err = MP_MEM;
/* min # of digits */
B = a->used;
/* now divide in two */
B = B/2;
/* init copy all the temps */
if (mp_init_size(&x0, B) != MP_OKAY) goto ERR;
if (mp_init_size(&x1, a->used - B) != MP_OKAY) goto X0;
/* init temps */
if (mp_init(&t1) != MP_OKAY) goto X1;
if (mp_init(&t2) != MP_OKAY) goto T1;
if (mp_init(&x0x0) != MP_OKAY) goto T2;
if (mp_init(&x1x1) != MP_OKAY) goto X0X0;
/* now shift the digits */
for (x = 0; x < B; x++) {
x0.dp[x] = a->dp[x];
}
for (x = B; x < a->used; x++) {
x1.dp[x-B] = a->dp[x];
}
x0.used = B;
x1.used = a->used - B;
mp_clamp(&x0);
mp_clamp(&x1);
/* now calc the products x0*x0 and x1*x1 */
if (mp_sqr(&x0, &x0x0) != MP_OKAY) goto X1X1; /* x0x0 = x0*x0 */
if (mp_sqr(&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */
/* now calc x1-x0 and y1-y0 */
if (mp_sub(&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */
if (mp_sqr(&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (y1 - y0) */
/* add x0y0 */
if (mp_add(&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0y0 + x1y1 */
if (mp_sub(&t2, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
/* shift by B */
if (mp_lshd(&t1, B) != MP_OKAY) goto X1X1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
if (mp_lshd(&x1x1, B*2) != MP_OKAY) goto X1X1; /* x1y1 = x1y1 << 2*B */
if (mp_add(&x0x0, &t1, &t1) != MP_OKAY) goto X1X1; /* t1 = x0y0 + t1 */
if (mp_add(&t1, &x1x1, b) != MP_OKAY) goto X1X1; /* t1 = x0y0 + t1 + x1y1 */
err = MP_OKAY;
X1X1: mp_clear(&x1x1);
X0X0: mp_clear(&x0x0);
T2 : mp_clear(&t2);
T1 : mp_clear(&t1);
X1 : mp_clear(&x1);
X0 : mp_clear(&x0);
ERR :
DECFUNC();
return err;
}
/* computes b = a*a */
int mp_sqr(mp_int *a, mp_int *b)
{
int res;
REGFUNC("mp_sqr");
VERIFY(a);
VERIFY(b);
if (a->used > KARATSUBA_SQR_CUTOFF) {
res = mp_karatsuba_sqr(a, b);
} else {
res = s_mp_sqr(a, b);
}
b->sign = MP_ZPOS;
DECFUNC();
return res;
}
/* integer signed division. c*b + d == a [e.g. a/b, c=quotient, d=remainder]
* HAC pp.598 Algorithm 14.20
*
* Note that the description in HAC is horribly incomplete. For example,
* it doesn't consider the case where digits are removed from 'x' in the inner
* loop. It also doesn't consider the case that y has fewer than three digits, etc..
*
* The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases.
*/
int mp_div(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
{
mp_int q, x, y, t1, t2;
int res, n, t, i, norm, neg;
REGFUNC("mp_div");
VERIFY(a);
VERIFY(b);
if (c != NULL) { VERIFY(c); }
if (d != NULL) { VERIFY(d); }
/* is divisor zero ? */
if (mp_iszero(b) == 1) {
DECFUNC();
return MP_VAL;
}
/* if a < b then q=0, r = a */
if (mp_cmp_mag(a, b) == MP_LT) {
if (d != NULL) {
res = mp_copy(a, d);
} else {
res = MP_OKAY;
}
if (c != NULL) {
mp_zero(c);
}
DECFUNC();
return res;
}
if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) {
DECFUNC();
return res;
}
q.used = a->used + 2;
if ((res = mp_init(&t1)) != MP_OKAY) {
goto __Q;
}
if ((res = mp_init(&t2)) != MP_OKAY) {
goto __T1;
}
if ((res = mp_init_copy(&x, a)) != MP_OKAY) {
goto __T2;
}
if ((res = mp_init_copy(&y, b)) != MP_OKAY) {
goto __X;
}
/* fix the sign */
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
x.sign = y.sign = MP_ZPOS;
/* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */
norm = 0;
while ((y.dp[y.used-1] & (((mp_digit)1)<<(DIGIT_BIT-1))) == ((mp_digit)0)) {
++norm;
if ((res = mp_mul_2d(&x, 1, &x)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_mul_2d(&y, 1, &y)) != MP_OKAY) {
goto __Y;
}
}
/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
n = x.used - 1;
t = y.used - 1;
/* step 2. while (x >= y*b^n-t) do { q[n-t] += 1; x -= y*b^{n-t} } */
if ((res = mp_lshd(&y, n - t)) != MP_OKAY) { /* y = y*b^{n-t} */
goto __Y;
}
while (mp_cmp(&x, &y) != MP_LT) {
++(q.dp[n - t]);
if ((res = mp_sub(&x, &y, &x)) != MP_OKAY) {
goto __Y;
}
}
/* reset y by shifting it back down */
mp_rshd(&y, n - t);
/* step 3. for i from n down to (t + 1) */
for (i = n; i >= (t + 1); i--) {
if (i > x.alloc) continue;
/* step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
if (x.dp[i] == y.dp[t]) {
q.dp[i - t - 1] = ((1UL<<DIGIT_BIT)-1UL);
} else {
mp_word tmp;
tmp = ((mp_word)x.dp[i]) << ((mp_word)DIGIT_BIT);
tmp |= ((mp_word)x.dp[i-1]);
tmp /= ((mp_word)y.dp[t]);
if (tmp > (mp_word)MP_MASK) tmp = MP_MASK;
q.dp[i - t - 1] = (mp_digit)(tmp & (mp_word)(MP_MASK));
}
/* step 3.2 while (q{i-t-1} * (yt * b + y{t-1})) > xi * b^2 + xi-1 * b + xi-2 do q{i-t-1} -= 1; */
q.dp[i-t-1] = (q.dp[i-t-1] + 1) & MP_MASK;
do {
q.dp[i-t-1] = (q.dp[i-t-1] - 1) & MP_MASK;
/* find left hand */
mp_zero(&t1);
t1.dp[0] = (t-1 < 0) ? 0 : y.dp[t-1];
t1.dp[1] = y.dp[t];
t1.used = 2;
if ((res = mp_mul_d(&t1, q.dp[i-t-1], &t1)) != MP_OKAY) {
goto __Y;
}
/* find right hand */
t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i-2];
t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i-1];
t2.dp[2] = x.dp[i];
t2.used = 3;
} while (mp_cmp(&t1, &t2) == MP_GT);
/* step 3.3 x = x - q{i-t-1} * y * b^{i-t-1} */
if ((res = mp_mul_d(&y, q.dp[i-t-1], &t1)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_lshd(&t1, i - t - 1)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_sub(&x, &t1, &x)) != MP_OKAY) {
goto __Y;
}
/* step 3.4 if x < 0 then { x = x + y*b^{i-t-1}; q{i-t-1} -= 1; } */
if (x.sign == MP_NEG) {
if ((res = mp_copy(&y, &t1)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_lshd(&t1, i-t-1)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_add(&x, &t1, &x)) != MP_OKAY) {
goto __Y;
}
q.dp[i-t-1] = (q.dp[i-t-1] - 1UL) & MP_MASK;
}
}
/* now q is the quotient and x is the remainder [which we have to normalize] */
/* get sign before writing to c */
x.sign = a->sign;
if (c != NULL) {
mp_clamp(&q);
mp_exch(&q, c);
c->sign = neg;
}
if (d != NULL) {
mp_div_2d(&x, norm, &x, NULL);
mp_clamp(&x);
mp_exch(&x, d);
}
res = MP_OKAY;
__Y: mp_clear(&y);
__X: mp_clear(&x);
__T2: mp_clear(&t2);
__T1: mp_clear(&t1);
__Q: mp_clear(&q);
DECFUNC();
return res;
}
/* c = a mod b, 0 <= c < b */
int mp_mod(mp_int *a, mp_int *b, mp_int *c)
{
mp_int t;
int res;
REGFUNC("mp_mod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_div(a, b, NULL, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
if (t.sign == MP_NEG) {
res = mp_add(b, &t, c);
} else {
res = MP_OKAY;
mp_exch(&t, c);
}
mp_clear(&t);
DECFUNC();
return res;
}
/* single digit addition */
int mp_add_d(mp_int *a, mp_digit b, mp_int *c)
{
mp_int t;
int res;
REGFUNC("mp_add_d");
VERIFY(a);
VERIFY(c);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
mp_set(&t, b);
res = mp_add(a, &t, c);
mp_clear(&t);
DECFUNC();
return res;
}
/* single digit subtraction */
int mp_sub_d(mp_int *a, mp_digit b, mp_int *c)
{
mp_int t;
int res;
REGFUNC("mp_sub_d");
VERIFY(a);
VERIFY(c);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
mp_set(&t, b);
res = mp_sub(a, &t, c);
mp_clear(&t);
DECFUNC();
return res;
}
/* multiply by a digit */
int mp_mul_d(mp_int *a, mp_digit b, mp_int *c)
{
int res, pa, ix;
mp_word r;
mp_digit u;
mp_int t;
REGFUNC("mp_mul_d");
VERIFY(a);
VERIFY(c);
pa = a->used;
if ((res = mp_init_size(&t, pa + 2)) != MP_OKAY) {
DECFUNC();
return res;
}
t.used = pa + 2;
u = 0;
for (ix = 0; ix < pa; ix++) {
r = ((mp_word)u) + ((mp_word)a->dp[ix]) * ((mp_word)b);
t.dp[ix] = (mp_digit)(r & ((mp_word)MP_MASK));
u = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
}
t.dp[ix] = u;
t.sign = a->sign;
mp_clamp(&t);
mp_exch(&t, c);
mp_clear(&t);
DECFUNC();
return MP_OKAY;
}
/* single digit division */
int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
{
mp_int t, t2;
int res;
REGFUNC("mp_div_d");
VERIFY(a);
if (c != NULL) { VERIFY(c); }
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_init(&t2)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
mp_set(&t, b);
res = mp_div(a, &t, c, &t2);
if (d != NULL) {
*d = t2.dp[0];
}
mp_clear(&t);
mp_clear(&t2);
DECFUNC();
return res;
}
int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c)
{
mp_int t, t2;
int res;
REGFUNC("mp_mod_d");
VERIFY(a);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_init(&t2)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
mp_set(&t, b);
mp_div(a, &t, NULL, &t2);
if (t2.sign == MP_NEG) {
if ((res = mp_add_d(&t2, b, &t2)) != MP_OKAY) {
mp_clear(&t);
mp_clear(&t2);
DECFUNC();
return res;
}
}
*c = t2.dp[0];
mp_clear(&t);
mp_clear(&t2);
DECFUNC();
return MP_OKAY;
}
int mp_expt_d(mp_int *a, mp_digit b, mp_int *c)
{
int res, x;
mp_int g;
REGFUNC("mp_expt_d");
VERIFY(a);
VERIFY(c);
if ((res = mp_init_copy(&g, a)) != MP_OKAY) {
DECFUNC();
return res;
}
/* set initial result */
mp_set(c, 1);
for (x = 0; x < (int)DIGIT_BIT; x++) {
if ((res = mp_sqr(c, c)) != MP_OKAY) {
mp_clear(&g);
DECFUNC();
return res;
}
if ((b & (mp_digit)(1<<(DIGIT_BIT-1))) != 0) {
if ((res = mp_mul(c, &g, c)) != MP_OKAY) {
mp_clear(&g);
DECFUNC();
return res;
}
}
b <<= 1;
}
mp_clear(&g);
DECFUNC();
return MP_OKAY;
}
/* simple modular functions */
/* d = a + b (mod c) */
int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
{
int res;
mp_int t;
REGFUNC("mp_addmod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
VERIFY(d);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_add(a, b, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
res = mp_mod(&t, c, d);
mp_clear(&t);
DECFUNC();
return res;
}
/* d = a - b (mod c) */
int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
{
int res;
mp_int t;
REGFUNC("mp_submod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
VERIFY(d);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_sub(a, b, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
res = mp_mod(&t, c, d);
mp_clear(&t);
DECFUNC();
return res;
}
/* d = a * b (mod c) */
int mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
{
int res;
mp_int t;
REGFUNC("mp_mulmod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
VERIFY(d);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_mul(a, b, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
res = mp_mod(&t, c, d);
mp_clear(&t);
DECFUNC();
return res;
}
/* c = a * a (mod b) */
int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
{
int res;
mp_int t;
REGFUNC("mp_sqrmod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_sqr(a, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
res = mp_mod(&t, b, c);
mp_clear(&t);
DECFUNC();
return res;
}
/* Greatest Common Divisor using the binary method [Algorithm B, page 338, vol2 of TAOCP]
*/
int mp_gcd(mp_int *a, mp_int *b, mp_int *c)
{
mp_int u, v, t;
int k, res, neg;
REGFUNC("mp_gcd");
VERIFY(a);
VERIFY(b);
VERIFY(c);
/* either zero than gcd is the largest */
if (mp_iszero(a) == 1 && mp_iszero(b) == 0) {
DECFUNC();
return mp_copy(b, c);
}
if (mp_iszero(a) == 0 && mp_iszero(b) == 1) {
DECFUNC();
return mp_copy(a, c);
}
if (mp_iszero(a) == 1 && mp_iszero(b) == 1) {
mp_set(c, 1);
DECFUNC();
return MP_OKAY;
}
/* if both are negative they share (-1) as a common divisor */
neg = (a->sign == b->sign) ? a->sign : MP_ZPOS;
if ((res = mp_init_copy(&u, a)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_init_copy(&v, b)) != MP_OKAY) {
goto __U;
}
/* must be positive for the remainder of the algorithm */
u.sign = v.sign = MP_ZPOS;
if ((res = mp_init(&t)) != MP_OKAY) {
goto __V;
}
/* B1. Find power of two */
k = 0;
while ((u.dp[0] & 1) == 0 && (v.dp[0] & 1) == 0) {
++k;
if ((res = mp_div_2d(&u, 1, &u, NULL)) != MP_OKAY) {
goto __T;
}
if ((res = mp_div_2d(&v, 1, &v, NULL)) != MP_OKAY) {
goto __T;
}
}
/* B2. Initialize */
if ((u.dp[0] & 1) == 1) {
if ((res = mp_copy(&v, &t)) != MP_OKAY) {
goto __T;
}
t.sign = MP_NEG;
} else {
if ((res = mp_copy(&u, &t)) != MP_OKAY) {
goto __T;
}
}
do {
/* B3 (and B4). Halve t, if even */
while (t.used != 0 && (t.dp[0] & 1) == 0) {
if ((res = mp_div_2d(&t, 1, &t, NULL)) != MP_OKAY) {
goto __T;
}
}
/* B5. if t>0 then u=t otherwise v=-t */
if (t.used != 0 && t.sign != MP_NEG) {
if ((res = mp_copy(&t, &u)) != MP_OKAY) {
goto __T;
}
} else {
if ((res = mp_copy(&t, &v)) != MP_OKAY) {
goto __T;
}
v.sign = (v.sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
}
/* B6. t = u - v, if t != 0 loop otherwise terminate */
if ((res = mp_sub(&u, &v, &t)) != MP_OKAY) {
goto __T;
}
} while (t.used != 0);
if ((res = mp_mul_2d(&u, k, &u)) != MP_OKAY) {
goto __T;
}
mp_exch(&u, c);
c->sign = neg;
res = MP_OKAY;
__T: mp_clear(&t);
__V: mp_clear(&u);
__U: mp_clear(&v);
DECFUNC();
return res;
}
/* computes least common multipble as a*b/(a, b) */
int mp_lcm(mp_int *a, mp_int *b, mp_int *c)
{
int res;
mp_int t;
REGFUNC("mp_lcm");
VERIFY(a);
VERIFY(b);
VERIFY(c);
if ((res = mp_init(&t)) != MP_OKAY) {
DECFUNC();
return res;
}
if ((res = mp_mul(a, b, &t)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
if ((res = mp_gcd(a, b, c)) != MP_OKAY) {
mp_clear(&t);
DECFUNC();
return res;
}
res = mp_div(&t, c, c, NULL);
mp_clear(&t);
DECFUNC();
return res;
}
/* computes the modular inverse via binary extended euclidean algorithm, that is c = 1/a mod b */
static int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c)
{
mp_int x, y, u, v, B, D;
int res, neg;
REGFUNC("fast_mp_invmod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
if ((res = mp_init(&x)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_init(&y)) != MP_OKAY) {
goto __X;
}
if ((res = mp_init(&u)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_init(&v)) != MP_OKAY) {
goto __U;
}
if ((res = mp_init(&B)) != MP_OKAY) {
goto __V;
}
if ((res = mp_init(&D)) != MP_OKAY) {
goto __B;
}
/* x == modulus, y == value to invert */
if ((res = mp_copy(b, &x)) != MP_OKAY) {
goto __D;
}
if ((res = mp_copy(a, &y)) != MP_OKAY) {
goto __D;
}
if ((res = mp_abs(&y, &y)) != MP_OKAY) {
goto __D;
}
/* 2. [modified] if x,y are both even then return an error! */
if (mp_iseven(&x) == 1 && mp_iseven(&y) == 1) {
res = MP_VAL;
goto __D;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy(&x, &u)) != MP_OKAY) {
goto __D;
}
if ((res = mp_copy(&y, &v)) != MP_OKAY) {
goto __D;
}
mp_set(&D, 1);
top:
/* 4. while u is even do */
while (mp_iseven(&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
goto __D;
}
/* 4.2 if A or B is odd then */
if (mp_iseven(&B) == 0) {
if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
goto __D;
}
}
/* A = A/2, B = B/2 */
if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
goto __D;
}
}
/* 5. while v is even do */
while (mp_iseven(&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
goto __D;
}
/* 5.2 if C,D are even then */
if (mp_iseven(&D) == 0) {
/* C = (C+y)/2, D = (D-x)/2 */
if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
goto __D;
}
}
/* C = C/2, D = D/2 */
if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
goto __D;
}
}
/* 6. if u >= v then */
if (mp_cmp(&u, &v) != MP_LT) {
/* u = u - v, A = A - C, B = B - D */
if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
goto __D;
}
} else {
/* v - v - u, C = C - A, D = D - B */
if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
goto __D;
}
}
/* if not zero goto step 4 */
if (mp_iszero(&u) == 0) goto top;
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
if (mp_cmp_d(&v, 1) != MP_EQ) {
res = MP_VAL;
goto __D;
}
/* b is now the inverse */
neg = a->sign;
while (D.sign == MP_NEG) {
if ((res = mp_add(&D, b, &D)) != MP_OKAY) {
goto __D;
}
}
mp_exch(&D, c);
c->sign = neg;
res = MP_OKAY;
__D: mp_clear(&D);
__B: mp_clear(&B);
__V: mp_clear(&v);
__U: mp_clear(&u);
__Y: mp_clear(&y);
__X: mp_clear(&x);
__ERR:
DECFUNC();
return res;
}
int mp_invmod(mp_int *a, mp_int *b, mp_int *c)
{
mp_int x, y, u, v, A, B, C, D;
int res;
REGFUNC("mp_invmod");
VERIFY(a);
VERIFY(b);
VERIFY(c);
/* b cannot be negative */
if (b->sign == MP_NEG) {
return MP_VAL;
}
/* if the modulus is odd we can use a faster routine instead */
if (mp_iseven(b) == 0) {
res = fast_mp_invmod(a,b,c);
DECFUNC();
return res;
}
if ((res = mp_init(&x)) != MP_OKAY) {
goto __ERR;
}
if ((res = mp_init(&y)) != MP_OKAY) {
goto __X;
}
if ((res = mp_init(&u)) != MP_OKAY) {
goto __Y;
}
if ((res = mp_init(&v)) != MP_OKAY) {
goto __U;
}
if ((res = mp_init(&A)) != MP_OKAY) {
goto __V;
}
if ((res = mp_init(&B)) != MP_OKAY) {
goto __A;
}
if ((res = mp_init(&C)) != MP_OKAY) {
goto __B;
}
if ((res = mp_init(&D)) != MP_OKAY) {
goto __C;
}
/* x = a, y = b */
if ((res = mp_copy(a, &x)) != MP_OKAY) {
goto __D;
}
if ((res = mp_copy(b, &y)) != MP_OKAY) {
goto __D;
}
if ((res = mp_abs(&x, &x)) != MP_OKAY) {
goto __D;
}
/* 2. [modified] if x,y are both even then return an error! */
if (mp_iseven(&x) == 1 && mp_iseven(&y) == 1) {
res = MP_VAL;
goto __D;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy(&x, &u)) != MP_OKAY) {
goto __D;
}
if ((res = mp_copy(&y, &v)) != MP_OKAY) {
goto __D;
}
mp_set(&A, 1);
mp_set(&D, 1);
top:
/* 4. while u is even do */
while (mp_iseven(&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
goto __D;
}
/* 4.2 if A or B is odd then */
if (mp_iseven(&A) == 0 || mp_iseven(&B) == 0) {
/* A = (A+y)/2, B = (B-x)/2 */
if ((res = mp_add(&A, &y, &A)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
goto __D;
}
}
/* A = A/2, B = B/2 */
if ((res = mp_div_2(&A, &A)) != MP_OKAY) {
goto __D;
}
if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
goto __D;
}
}
/* 5. while v is even do */
while (mp_iseven(&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
goto __D;
}
/* 5.2 if C,D are even then */
if (mp_iseven(&C) == 0 || mp_iseven(&D) == 0) {
/* C = (C+y)/2, D = (D-x)/2 */
if ((res = mp_add(&C, &y, &C)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
goto __D;
}
}
/* C = C/2, D = D/2 */
if ((res = mp_div_2(&C, &C)) != MP_OKAY) {
goto __D;
}
if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
goto __D;
}
}
/* 6. if u >= v then */
if (mp_cmp(&u, &v) != MP_LT) {
/* u = u - v, A = A - C, B = B - D */
if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&A, &C, &A)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
goto __D;
}
} else {
/* v - v - u, C = C - A, D = D - B */
if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&C, &A, &C)) != MP_OKAY) {
goto __D;
}
if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
goto __D;
}
}
/* if not zero goto step 4 */
if (mp_iszero(&u) == 0) goto top;
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
if (mp_cmp_d(&v, 1) != MP_EQ) {
res = MP_VAL;
goto __D;
}
/* a is now the inverse */
mp_exch(&C, c);
res = MP_OKAY;
__D: mp_clear(&D);
__C: mp_clear(&C);
__B: mp_clear(&B);
__A: mp_clear(&A);
__V: mp_clear(&v);
__U: mp_clear(&u);
__Y: mp_clear(&y);
__X: mp_clear(&x);
__ERR:
DECFUNC();
return res;
}
/* pre-calculate the value required for Barrett reduction
* For a given modulus "b" it calulates the value required in "a"
*/
int mp_reduce_setup(mp_int *a, mp_int *b)
{
int res;
REGFUNC("mp_reduce_setup");
VERIFY(a);
VERIFY(b);
mp_set(a, 1);
if ((res = mp_lshd(a, b->used * 2)) != MP_OKAY) {
DECFUNC();
return res;
}
res = mp_div(a, b, a, NULL);
DECFUNC();
return res;
}
/* reduces x mod m, assumes 0 < x < m^2, mu is precomputed via mp_reduce_setup
* From HAC pp.604 Algorithm 14.42
*/
int mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
{
mp_int q;
int res, um = m->used;
REGFUNC("mp_reduce");
VERIFY(x);
VERIFY(m);
VERIFY(mu);
if((res = mp_init_copy(&q, x)) != MP_OKAY) {
DECFUNC();
return res;
}
mp_rshd(&q, um - 1); /* q1 = x / b^(k-1) */
/* according to HAC this is optimization is ok */
if (((unsigned long)m->used) > (1UL<<(unsigned long)(DIGIT_BIT-1UL))) {
if ((res = mp_mul(&q, mu, &q)) != MP_OKAY) {
goto CLEANUP;
}
} else {
if ((res = s_mp_mul_high_digs(&q, mu, &q, um-1)) != MP_OKAY) {
goto CLEANUP;
}
}
mp_rshd(&q, um + 1); /* q3 = q2 / b^(k+1) */
/* x = x mod b^(k+1), quick (no division) */
if ((res = mp_mod_2d(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
goto CLEANUP;
}
/* q = q * m mod b^(k+1), quick (no division) */
if ((res = s_mp_mul_digs(&q, m, &q, um + 1)) != MP_OKAY) {
goto CLEANUP;
}
/* x = x - q */
if((res = mp_sub(x, &q, x)) != MP_OKAY)
goto CLEANUP;
/* If x < 0, add b^(k+1) to it */
if(mp_cmp_d(x, 0) == MP_LT) {
mp_set(&q, 1);
if((res = mp_lshd(&q, um + 1)) != MP_OKAY)
goto CLEANUP;
if((res = mp_add(x, &q, x)) != MP_OKAY)
goto CLEANUP;
}
/* Back off if it's too big */
while(mp_cmp(x, m) != MP_LT) {
if((res = s_mp_sub(x, m, x)) != MP_OKAY)
break;
}
CLEANUP:
mp_clear(&q);
DECFUNC();
return res;
}
/* computes Y == G^X mod P, HAC pp.616, Algorithm 14.85
*
* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
* The value of k changes based on the size of the exponent.
*/
int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
{
mp_int M[64], res, mu;
mp_digit buf;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
REGFUNC("mp_exptmod");
VERIFY(G);
VERIFY(X);
VERIFY(P);
VERIFY(Y);
/* find window size */
x = mp_count_bits(X);
if (x <= 18) { winsize = 2; }
else if (x <= 84) { winsize = 3; }
else if (x <= 300) { winsize = 4; }
else if (x <= 930) { winsize = 5; }
else { winsize = 6; }
/* init G array */
for (x = 0; x < (1<<winsize); x++) {
if ((err = mp_init_size(&M[x], 1)) != MP_OKAY) {
for (y = 0; y < x; y++) {
mp_clear(&M[y]);
}
DECFUNC();
return err;
}
}
/* create mu, used for Barrett reduction */
if ((err = mp_init(&mu)) != MP_OKAY) {
goto __M;
}
if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) {
goto __MU;
}
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
* This table is not made in the straight forward manner of a for loop with only
* multiplications. Since squaring is faster than multiplication we use as many
* squarings as possible. As a result about half of the steps to make the M
* table are squarings.
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
mp_set(&M[0], 1);
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
goto __MU;
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy(&M[1], &M[1<<(winsize-1)])) != MP_OKAY) {
goto __MU;
}
for (x = 0; x < (winsize-1); x++) {
if ((err = mp_sqr(&M[1<<(winsize-1)], &M[1<<(winsize-1)])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce(&M[1<<(winsize-1)], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* create upper table */
for (x = (1<<(winsize-1))+1; x < (1 << winsize); x++) {
if ((err = mp_mul(&M[x-1], &M[1], &M[x])) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce(&M[x], P, &mu)) != MP_OKAY) {
goto __MU;
}
}
/* init result */
if ((err = mp_init(&res)) != MP_OKAY) {
goto __MU;
}
mp_set(&res, 1);
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 0;
buf = 0;
digidx = X->used - 1;
bitcpy = bitbuf = 0;
bitcnt = 1;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
if (digidx == -1) {
break;
}
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (buf >> (DIGIT_BIT - 1)) & 1;
buf <<= 1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (y == 0 && mode == 0) continue;
/* if the bit is zero and mode == 1 then we square */
if (y == 0 && mode == 1) {
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce(&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
continue;
}
/* else we add it to the window */
bitbuf |= (y<<(winsize-++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply multiply */
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce(&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
}
/* then multiply */
if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce(&res, P, &mu)) != MP_OKAY) {
goto __MU;
}
/* empty window and reset */
bitcpy = bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr(&res, &res)) != MP_OKAY) {
goto __RES;
}
if ((err = mp_reduce(&res, P, &mu)) != MP_OKAY) {
goto __RES;
}
bitbuf <<= 1;
if ((bitbuf & (1<<winsize)) != 0) {
/* then multiply */
if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) {
goto __MU;
}
if ((err = mp_reduce(&res, P, &mu)) != MP_OKAY) {
goto __MU;
}
}
}
}
mp_exch(&res, Y);
err = MP_OKAY;
__RES: mp_clear(&res);
__MU : mp_clear(&mu);
__M :
for (x = 0; x < (1<<winsize); x++) {
mp_clear(&M[x]);
}
DECFUNC();
return err;
}
/* find the n'th root of an integer
*
* Result found such that (c)^b <= a and (c+1)^b > a
*/
int mp_n_root(mp_int *a, mp_digit b, mp_int *c)
{
mp_int t1, t2, t3;
int res, neg;
/* input must be positive if b is even*/
if ((b&1) == 0 && a->sign == MP_NEG) {
return MP_VAL;
}
if ((res = mp_init(&t1)) != MP_OKAY) {
return res;
}
if ((res = mp_init(&t2)) != MP_OKAY) {
goto __T1;
}
if ((res = mp_init(&t3)) != MP_OKAY) {
goto __T2;
}
/* if a is negative fudge the sign but keep track */
neg = a->sign;
a->sign = MP_ZPOS;
/* t2 = a */
if ((res = mp_copy(a, &t2)) != MP_OKAY) {
goto __T3;
}
do {
/* t1 = t2 */
if ((res = mp_copy(&t2, &t1)) != MP_OKAY) {
goto __T3;
}
/* t2 = t1 - ((t1^b - a) / (b * t1^(b-1))) */
if ((res = mp_expt_d(&t1, b-1, &t3)) != MP_OKAY) { /* t3 = t1^(b-1) */
goto __T3;
}
/* numerator */
if ((res = mp_mul(&t3, &t1, &t2)) != MP_OKAY) { /* t2 = t1^b */
goto __T3;
}
if ((res = mp_sub(&t2, a, &t2)) != MP_OKAY) { /* t2 = t1^b - a */
goto __T3;
}
if ((res = mp_mul_d(&t3, b, &t3)) != MP_OKAY) { /* t3 = t1^(b-1) * b */
goto __T3;
}
if ((res = mp_div(&t2, &t3, &t3, NULL)) != MP_OKAY) { /* t3 = (t1^b - a)/(b * t1^(b-1)) */
goto __T3;
}
if ((res = mp_sub(&t1, &t3, &t2)) != MP_OKAY) {
goto __T3;
}
} while (mp_cmp(&t1, &t2) != MP_EQ);
/* result can be at most off by one so check */
if ((res = mp_expt_d(&t1, b, &t2)) != MP_OKAY) {
goto __T3;
}
if (mp_cmp(&t2, a) == MP_GT) {
if ((res = mp_sub_d(&t1, 1, &t1)) != MP_OKAY) {
goto __T3;
}
}
/* reset the sign of a first */
a->sign = neg;
/* set the result */
mp_exch(&t1, c);
/* set the sign of the result */
c->sign = neg;
res = MP_OKAY;
__T3: mp_clear(&t3);
__T2: mp_clear(&t2);
__T1: mp_clear(&t1);
return res;
}
/* --> radix conversion <-- */
/* reverse an array, used for radix code */
static void reverse(unsigned char *s, int len)
{
int ix, iy;
unsigned char t;
ix = 0;
iy = len - 1;
while (ix < iy) {
t = s[ix]; s[ix] = s[iy]; s[iy] = t;
++ix;
--iy;
}
}
/* returns the number of bits in an int */
int mp_count_bits(mp_int *a)
{
int r;
mp_digit q;
if (a->used == 0) {
return 0;
}
r = (a->used - 1) * DIGIT_BIT;
q = a->dp[a->used - 1];
while (q) {
++r;
q >>= ((mp_digit)1);
}
return r;
}
/* reads a unsigned char array, assumes the msb is stored first [big endian] */
int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c)
{
int res;
mp_zero(a);
while (c-- > 0) {
if ((res = mp_mul_2d(a, 8, a)) != MP_OKAY) {
return res;
}
if (DIGIT_BIT != 7) {
a->dp[0] |= *b++;
a->used += 1;
} else {
a->dp[0] = (*b & MP_MASK);
a->dp[1] |= ((*b++ >> 7U) & 1);
a->used += 2;
}
}
mp_clamp(a);
return MP_OKAY;
}
/* read signed bin, big endian, first byte is 0==positive or 1==negative */
int mp_read_signed_bin(mp_int *a, unsigned char *b, int c)
{
int res;
if ((res = mp_read_unsigned_bin(a, b + 1, c - 1)) != MP_OKAY) {
return res;
}
a->sign = ((b[0] == (unsigned char)0) ? MP_ZPOS : MP_NEG);
return MP_OKAY;
}
/* store in unsigned [big endian] format */
int mp_to_unsigned_bin(mp_int *a, unsigned char *b)
{
int x, res;
mp_int t;
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
return res;
}
x = 0;
while (mp_iszero(&t) == 0) {
if (DIGIT_BIT != 7) {
b[x++] = (unsigned char)(t.dp[0] & 255);
} else {
b[x++] = (unsigned char)(t.dp[0] | ((t.dp[1] & 0x01) << 7));
}
if ((res = mp_div_2d(&t, 8, &t, NULL)) != MP_OKAY) {
mp_clear(&t);
return res;
}
}
reverse(b, x);
mp_clear(&t);
return MP_OKAY;
}
/* store in signed [big endian] format */
int mp_to_signed_bin(mp_int *a, unsigned char *b)
{
int res;
if ((res = mp_to_unsigned_bin(a, b+1)) != MP_OKAY) {
return res;
}
b[0] = (unsigned char)((a->sign == MP_ZPOS) ? 0 : 1);
return MP_OKAY;
}
/* get the size for an unsigned equivalent */
int mp_unsigned_bin_size(mp_int *a)
{
return (mp_count_bits(a)/8 + ((mp_count_bits(a)&7) != 0 ? 1 : 0));
}
/* get the size for an signed equivalent */
int mp_signed_bin_size(mp_int *a)
{
return 1 + (mp_count_bits(a)/8 + ((mp_count_bits(a)&7) != 0 ? 1 : 0));
}
/* read a string [ASCII] in a given radix */
int mp_read_radix(mp_int *a, char *str, int radix)
{
int y, res, neg;
char ch;
if (radix < 2 || radix > 64) {
return MP_VAL;
}
if (*str == '-') {
++str;
neg = MP_NEG;
} else {
neg = MP_ZPOS;
}
mp_zero(a);
while (*str) {
ch = (char)((radix < 36) ? toupper(*str) : *str);
for (y = 0; y < 64; y++) {
if (ch == s_rmap[y]) {
break;
}
}
if (y < radix) {
if ((res = mp_mul_d(a, (mp_digit)radix, a)) != MP_OKAY) {
return res;
}
if ((res = mp_add_d(a, (mp_digit)y, a)) != MP_OKAY) {
return res;
}
} else {
break;
}
++str;
}
a->sign = neg;
return MP_OKAY;
}
/* stores a bignum as a ASCII string in a given radix (2..64) */
int mp_toradix(mp_int *a, char *str, int radix)
{
int res, digs;
mp_int t;
mp_digit d;
char *_s = str;
if (radix < 2 || radix > 64) {
return MP_VAL;
}
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
return res;
}
if (t.sign == MP_NEG) {
++_s;
*str++ = '-';
t.sign = MP_ZPOS;
}
digs = 0;
while (mp_iszero(&t) == 0) {
if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
mp_clear(&t);
return res;
}
*str++ = s_rmap[d];
++digs;
}
reverse((unsigned char *)_s, digs);
*str++ = '\0';
mp_clear(&t);
return MP_OKAY;
}
/* returns size of ASCII reprensentation */
int mp_radix_size(mp_int *a, int radix)
{
int res, digs;
mp_int t;
mp_digit d;
digs = 0;
if (radix < 2 || radix > 64) {
return 0;
}
if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
return 0;
}
if (t.sign == MP_NEG) {
++digs;
t.sign = MP_ZPOS;
}
while (mp_iszero(&t) == 0) {
if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
mp_clear(&t);
return 0;
}
++digs;
}
mp_clear(&t);
return digs + 1;
}