759a926cfe
gitignore: add libtool output booker.pl: fix source code parsing and trim trailing spaces demo/demo: fix compiler warning, add informational output demo/timing & etc/tune: fix TIMFUNC() makefile: minor changes makefile.shared: increase version, add missing mp_balance_mul tommath: make sure that DIGIT_BIT is correct
146 lines
2.9 KiB
C
146 lines
2.9 KiB
C
/* Tune the Karatsuba parameters
|
|
*
|
|
* Tom St Denis, tomstdenis@gmail.com
|
|
*/
|
|
#include <tommath.h>
|
|
#include <time.h>
|
|
|
|
/* how many times todo each size mult. Depends on your computer. For slow computers
|
|
* this can be low like 5 or 10. For fast [re: Athlon] should be 25 - 50 or so
|
|
*/
|
|
#define TIMES (1UL<<14UL)
|
|
|
|
#ifndef X86_TIMER
|
|
|
|
/* RDTSC from Scott Duplichan */
|
|
static ulong64 TIMFUNC (void)
|
|
{
|
|
#if defined __GNUC__
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
/* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html
|
|
* the old code always got a warning issued by gcc, clang did not complain...
|
|
*/
|
|
unsigned hi, lo;
|
|
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
|
return ((ulong64)lo)|( ((ulong64)hi)<<32);
|
|
#else /* gcc-IA64 version */
|
|
unsigned long result;
|
|
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
|
while (__builtin_expect ((int) result == -1, 0))
|
|
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
|
return result;
|
|
#endif
|
|
|
|
// Microsoft and Intel Windows compilers
|
|
#elif defined _M_IX86
|
|
__asm rdtsc
|
|
#elif defined _M_AMD64
|
|
return __rdtsc ();
|
|
#elif defined _M_IA64
|
|
#if defined __INTEL_COMPILER
|
|
#include <ia64intrin.h>
|
|
#endif
|
|
return __getReg (3116);
|
|
#else
|
|
#error need rdtsc function for this build
|
|
#endif
|
|
}
|
|
|
|
|
|
/* generic ISO C timer */
|
|
ulong64 LBL_T;
|
|
void t_start(void) { LBL_T = TIMFUNC(); }
|
|
ulong64 t_read(void) { return TIMFUNC() - LBL_T; }
|
|
|
|
#else
|
|
extern void t_start(void);
|
|
extern ulong64 t_read(void);
|
|
#endif
|
|
|
|
ulong64 time_mult(int size, int s)
|
|
{
|
|
unsigned long x;
|
|
mp_int a, b, c;
|
|
ulong64 t1;
|
|
|
|
mp_init (&a);
|
|
mp_init (&b);
|
|
mp_init (&c);
|
|
|
|
mp_rand (&a, size);
|
|
mp_rand (&b, size);
|
|
|
|
if (s == 1) {
|
|
KARATSUBA_MUL_CUTOFF = size;
|
|
} else {
|
|
KARATSUBA_MUL_CUTOFF = 100000;
|
|
}
|
|
|
|
t_start();
|
|
for (x = 0; x < TIMES; x++) {
|
|
mp_mul(&a,&b,&c);
|
|
}
|
|
t1 = t_read();
|
|
mp_clear (&a);
|
|
mp_clear (&b);
|
|
mp_clear (&c);
|
|
return t1;
|
|
}
|
|
|
|
ulong64 time_sqr(int size, int s)
|
|
{
|
|
unsigned long x;
|
|
mp_int a, b;
|
|
ulong64 t1;
|
|
|
|
mp_init (&a);
|
|
mp_init (&b);
|
|
|
|
mp_rand (&a, size);
|
|
|
|
if (s == 1) {
|
|
KARATSUBA_SQR_CUTOFF = size;
|
|
} else {
|
|
KARATSUBA_SQR_CUTOFF = 100000;
|
|
}
|
|
|
|
t_start();
|
|
for (x = 0; x < TIMES; x++) {
|
|
mp_sqr(&a,&b);
|
|
}
|
|
t1 = t_read();
|
|
mp_clear (&a);
|
|
mp_clear (&b);
|
|
return t1;
|
|
}
|
|
|
|
int
|
|
main (void)
|
|
{
|
|
ulong64 t1, t2;
|
|
int x, y;
|
|
|
|
for (x = 8; ; x += 2) {
|
|
t1 = time_mult(x, 0);
|
|
t2 = time_mult(x, 1);
|
|
printf("%d: %9llu %9llu, %9llu\n", x, t1, t2, t2 - t1);
|
|
if (t2 < t1) break;
|
|
}
|
|
y = x;
|
|
|
|
for (x = 8; ; x += 2) {
|
|
t1 = time_sqr(x, 0);
|
|
t2 = time_sqr(x, 1);
|
|
printf("%d: %9llu %9llu, %9llu\n", x, t1, t2, t2 - t1);
|
|
if (t2 < t1) break;
|
|
}
|
|
printf("KARATSUBA_MUL_CUTOFF = %d\n", y);
|
|
printf("KARATSUBA_SQR_CUTOFF = %d\n", x);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* $Source$ */
|
|
/* $Revision$ */
|
|
/* $Date$ */
|