added libtommath-0.38

This commit is contained in:
Tom St Denis 2006-01-26 03:07:36 +00:00 committed by Steffen Jaeckel
parent fde740eae0
commit 21adca01da
12 changed files with 55 additions and 68 deletions

BIN
bn.pdf

Binary file not shown.

2
bn.tex
View File

@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
\title{LibTomMath User Manual \\ v0.37}
\title{LibTomMath User Manual \\ v0.38}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been

View File

@ -78,10 +78,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
}
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
}
/* setup dest */
olduse = c->used;

View File

@ -70,9 +70,6 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT);
}
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */
olduse = c->used;
c->used = pa;

View File

@ -1,3 +1,7 @@
Jan 26th, 2006
v0.38 -- broken makefile.shared fixed
-- removed some carry stores that were not required [updated text]
November 18th, 2005
v0.37 -- [Don Porter] reported on a TCL list [HEY SEND ME BUGREPORTS ALREADY!!!] that mp_add_d() would compute -0 with some inputs. Fixed.
-- [rinick@gmail.com] reported the makefile.bcc was messed up. Fixed.

View File

@ -3,7 +3,7 @@
#Tom St Denis
#version of library
VERSION=0.37
VERSION=0.38
CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare

View File

@ -1,7 +1,7 @@
#Makefile for GCC
#
#Tom St Denis
VERSION=0:37
VERSION=0:38
CC = libtool --mode=compile gcc

Binary file not shown.

View File

@ -458,10 +458,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
}
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
}
/* setup dest */
olduse = c->used;
@ -564,9 +561,6 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT);
}
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */
olduse = c->used;
c->used = pa;

Binary file not shown.

View File

@ -66,7 +66,7 @@ QUALCOMM Australia \\
}
}
\maketitle
This text has been placed in the public domain. This text corresponds to the v0.37 release of the
This text has been placed in the public domain. This text corresponds to the v0.38 release of the
LibTomMath project.
\begin{alltt}
@ -77,7 +77,7 @@ K2L 1C3
Canada
Phone: 1-613-836-3160
Email: tomstdenis@iahu.ca
Email: tomstdenis@gmail.com
\end{alltt}
This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
@ -2190,7 +2190,7 @@ left.
After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts
required. If it is non-zero a modified shift loop is used to calculate the remaining product.
Essentially the loop is a generic version of algorith mp\_mul2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
Essentially the loop is a generic version of algorithm mp\_mul\_2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
variable is used to extract the upper $d$ bits to form the carry for the next iteration.
This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to
@ -2611,17 +2611,16 @@ Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the s
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\
7. $oldused \leftarrow c.used$ \\
8. $c.used \leftarrow digs$ \\
9. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\
10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\
6. $oldused \leftarrow c.used$ \\
7. $c.used \leftarrow digs$ \\
8. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}8.1 $c_{ix} \leftarrow W_{ix}$ \\
9. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow 0$ \\
\\
11. Clamp $c$. \\
12. Return MP\_OKAY. \\
10. Clamp $c$. \\
11. Return MP\_OKAY. \\
\hline
\end{tabular}
\end{center}

View File

@ -66,7 +66,7 @@ QUALCOMM Australia \\
}
}
\maketitle
This text has been placed in the public domain. This text corresponds to the v0.37 release of the
This text has been placed in the public domain. This text corresponds to the v0.38 release of the
LibTomMath project.
\begin{alltt}
@ -77,7 +77,7 @@ K2L 1C3
Canada
Phone: 1-613-836-3160
Email: tomstdenis@iahu.ca
Email: tomstdenis@gmail.com
\end{alltt}
This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
@ -3169,7 +3169,7 @@ left.
After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts
required. If it is non-zero a modified shift loop is used to calculate the remaining product.
Essentially the loop is a generic version of algorith mp\_mul2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
Essentially the loop is a generic version of algorithm mp\_mul\_2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
variable is used to extract the upper $d$ bits to form the carry for the next iteration.
This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to
@ -3864,17 +3864,16 @@ Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the s
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\
7. $oldused \leftarrow c.used$ \\
8. $c.used \leftarrow digs$ \\
9. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\
10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\
6. $oldused \leftarrow c.used$ \\
7. $c.used \leftarrow digs$ \\
8. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}8.1 $c_{ix} \leftarrow W_{ix}$ \\
9. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow 0$ \\
\\
11. Clamp $c$. \\
12. Return MP\_OKAY. \\
10. Clamp $c$. \\
11. Return MP\_OKAY. \\
\hline
\end{tabular}
\end{center}
@ -3977,33 +3976,30 @@ and addition operations in the nested loop in parallel.
077
078 /* make next carry */
079 _W = _W >> ((mp_word)DIGIT_BIT);
080 \}
080 \}
081
082 /* store final carry */
083 W[ix] = (mp_digit)(_W & MP_MASK);
084
085 /* setup dest */
086 olduse = c->used;
087 c->used = pa;
088
089 \{
090 register mp_digit *tmpc;
091 tmpc = c->dp;
092 for (ix = 0; ix < pa+1; ix++) \{
093 /* now extract the previous digit [below the carry] */
094 *tmpc++ = W[ix];
095 \}
096
097 /* clear unused digits [that existed in the old copy of c] */
098 for (; ix < olduse; ix++) \{
099 *tmpc++ = 0;
100 \}
101 \}
102 mp_clamp (c);
103 return MP_OKAY;
104 \}
105 #endif
106
082 /* setup dest */
083 olduse = c->used;
084 c->used = pa;
085
086 \{
087 register mp_digit *tmpc;
088 tmpc = c->dp;
089 for (ix = 0; ix < pa+1; ix++) \{
090 /* now extract the previous digit [below the carry] */
091 *tmpc++ = W[ix];
092 \}
093
094 /* clear unused digits [that existed in the old copy of c] */
095 for (; ix < olduse; ix++) \{
096 *tmpc++ = 0;
097 \}
098 \}
099 mp_clamp (c);
100 return MP_OKAY;
101 \}
102 #endif
103
\end{alltt}
\end{small}
@ -4020,7 +4016,7 @@ slower and also often doesn't exist. This new algorithm only performs two reads
compiler has aliased $\_ \hat W$ to a CPU register.
After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines 76, 79) to forward it as
a carry for the next pass. After the outer loop we use the final carry (line 83) as the last digit of the product.
a carry for the next pass. After the outer loop we use the final carry (line 76) as the last digit of the product.
\subsection{Polynomial Basis Multiplication}
To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms