From e68439aae10d003250afa6c1f57025bfee5f82ed Mon Sep 17 00:00:00 2001
From: lomereiter <lomereiter@gmail.com>
Date: Mon, 23 May 2011 19:44:05 +0400
Subject: [PATCH] balancing multiplication like that in Ruby 1.9

---
 bn_mp_balance_mul.c | 102 ++++++++++++++++++++++++++++++++++++++++++++
 bn_mp_mul.c         |  28 ++++++++++--
 makefile            |   3 +-
 tommath.h           |   1 +
 tommath_class.h     |   2 +
 5 files changed, 131 insertions(+), 5 deletions(-)
 create mode 100644 bn_mp_balance_mul.c

diff --git a/bn_mp_balance_mul.c b/bn_mp_balance_mul.c
new file mode 100644
index 0000000..8f1daab
--- /dev/null
+++ b/bn_mp_balance_mul.c
@@ -0,0 +1,102 @@
+#include <tommath.h>
+#ifdef BN_MP_BALANCE_MUL_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
+ */
+
+/* c = |a| * |b| using balancing multiplication.
+ * If |a| is much less than |b|, 
+ * we firstly split b into chunks such that length of each one is 
+ * roughly equal to that of |a|.
+ */
+int mp_balance_mul (mp_int * a, mp_int * b, mp_int * c)
+{
+  /* the following algorithm is taken from 
+   * Ruby core; namely, function 'bigmul1_balance'
+   * from 'bignum.c'
+   */
+  mp_int t1, t2, tmp;
+  long i, an, bn, r, n;
+  int res, olduse,  min, max;
+  int err = MP_MEM;
+
+  mp_digit *bds, *cds, *t1ds;
+
+  an = a->used;
+  bn = b->used;
+  if ((res = mp_grow(c, an + bn)) != MP_OKAY) {
+    return res;
+  }
+
+  if (mp_init_size(&t1, an) != MP_OKAY) {
+    goto ERR;
+  }
+
+  bds = b->dp;
+  cds = c->dp;
+  t1ds = t1.dp;
+
+  n = 0;
+
+  mp_int x;
+
+  c->used = an + bn;
+  while (bn > 0) {
+    r = MIN(an, bn);
+    for (i = 0; i < r; ++i)
+      t1ds[i] = bds[n + i];
+    t1.used = r;
+
+    mp_init_size(&t2, an + r);
+    mp_mul(a, &t1, &t2);
+
+    if (t2.used > c->used - n) {
+      min = c->used - n; max = t2.used;
+      x.used = t2.used; x.dp = t2.dp;
+    } else {
+      min = t2.used; max = c->used - n;
+      x.used = c->used - n; x.dp = c->dp + n;
+    }
+
+    register mp_digit u, *tmpx, *tmpt2, *tmpcn;
+    register int i;
+    tmpx = tmpcn = x.dp; tmpt2 = t2.dp;
+    u = 0;
+    for (i = 0; i < min; i++) {
+      *tmpcn = *tmpx++ + *tmpt2++ + u;
+      u = *tmpcn >> ((mp_digit)DIGIT_BIT);
+      *tmpcn++ &= MP_MASK;
+    }
+    if (min != max) {
+      for (; i < max; i++) {
+        *tmpcn = x.dp[i] + u;
+        u = *tmpcn >> ((mp_digit)DIGIT_BIT);
+        *tmpcn++ &= MP_MASK;
+      }
+    }
+    *tmpcn++ = u;
+    
+    bn -= r;
+    n += r;
+  }
+  mp_clamp(c);
+  return MP_OKAY;
+ERR:
+  return err;
+}
+#endif
+
+/* $Source$ */
+/* $Revision$ */
+/* $Date$ */
diff --git a/bn_mp_mul.c b/bn_mp_mul.c
index 64e32cc..4fdf5ac 100644
--- a/bn_mp_mul.c
+++ b/bn_mp_mul.c
@@ -21,15 +21,27 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
   int     res, neg;
   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
 
+  int     an, bn, tn;
+  mp_int * t;
+  an = a -> used;
+  bn = b -> used;
+  if (an > bn) {
+    tn = an; an = bn; bn = tn;
+    t = a; a = b; b = t;
+  }
+  /* now a->used <= b->used */
+
   /* use Toom-Cook? */
 #ifdef BN_MP_TOOM_MUL_C
-  if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
+  if (a->used >= TOOM_MUL_CUTOFF) {
+    if (2 * an <= bn) goto balance;
     res = mp_toom_mul(a, b, c);
   } else 
 #endif
 #ifdef BN_MP_KARATSUBA_MUL_C
   /* use Karatsuba? */
-  if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
+  if (a->used >= KARATSUBA_MUL_CUTOFF) {
+    if (2 * an <= bn) goto balance;
     res = mp_karatsuba_mul (a, b, c);
   } else 
 #endif
@@ -44,8 +56,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
 
 #ifdef BN_FAST_S_MP_MUL_DIGS_C
     if ((digs < MP_WARRAY) &&
-        MIN(a->used, b->used) <= 
-        (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+        a->used <= (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
       res = fast_s_mp_mul_digs (a, b, c, digs);
     } else 
 #endif
@@ -56,8 +67,17 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
 #endif
 
   }
+ret:
   c->sign = (c->used > 0) ? neg : MP_ZPOS;
   return res;
+
+balance:
+  /* if a is much smaller than b
+   * use balance multiplication
+   * (the idea is taken from Ruby core)
+   */
+  res = mp_balance_mul(a, b, c);
+  goto ret;
 }
 #endif
 
diff --git a/makefile b/makefile
index 3ecb3be..385ad3a 100644
--- a/makefile
+++ b/makefile
@@ -93,7 +93,8 @@ bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
 bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
 bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
 bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
-bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o bn_mp_import.o bn_mp_export.o
+bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o bn_mp_import.o bn_mp_export.o \
+bn_mp_balance_mul.o
 
 $(LIBNAME):  $(OBJECTS)
 	$(AR) $(ARFLAGS) $@ $(OBJECTS)
diff --git a/tommath.h b/tommath.h
index aaae2d2..055a4e0 100644
--- a/tommath.h
+++ b/tommath.h
@@ -564,6 +564,7 @@ int fast_s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
 int s_mp_mul_high_digs(mp_int *a, mp_int *b, mp_int *c, int digs);
 int fast_s_mp_sqr(mp_int *a, mp_int *b);
 int s_mp_sqr(mp_int *a, mp_int *b);
+int mp_balance_mul(mp_int *a, mp_int *b, mp_int *c);
 int mp_karatsuba_mul(mp_int *a, mp_int *b, mp_int *c);
 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c);
 int mp_karatsuba_sqr(mp_int *a, mp_int *b);
diff --git a/tommath_class.h b/tommath_class.h
index 958b36e..36e5962 100644
--- a/tommath_class.h
+++ b/tommath_class.h
@@ -20,6 +20,7 @@
 #define BN_MP_ADD_D_C
 #define BN_MP_ADDMOD_C
 #define BN_MP_AND_C
+#define BN_MP_BALANCE_MUL_C
 #define BN_MP_CLAMP_C
 #define BN_MP_CLEAR_C
 #define BN_MP_CLEAR_MULTI_C
@@ -568,6 +569,7 @@
 #endif
 
 #if defined(BN_MP_MUL_C)
+   #define BN_MP_BALANCE_MUL_C
    #define BN_MP_TOOM_MUL_C
    #define BN_MP_KARATSUBA_MUL_C
    #define BN_FAST_S_MP_MUL_DIGS_C