added libtommath-0.38

This commit is contained in:
Tom St Denis 2006-01-26 03:07:36 +00:00 committed by Steffen Jaeckel
parent fde740eae0
commit 21adca01da
12 changed files with 55 additions and 68 deletions

BIN
bn.pdf

Binary file not shown.

2
bn.tex
View File

@ -49,7 +49,7 @@
\begin{document} \begin{document}
\frontmatter \frontmatter
\pagestyle{empty} \pagestyle{empty}
\title{LibTomMath User Manual \\ v0.37} \title{LibTomMath User Manual \\ v0.38}
\author{Tom St Denis \\ tomstdenis@iahu.ca} \author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle \maketitle
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been

View File

@ -78,10 +78,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */ /* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT); _W = _W >> ((mp_word)DIGIT_BIT);
} }
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */ /* setup dest */
olduse = c->used; olduse = c->used;

View File

@ -70,9 +70,6 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT); _W = _W >> ((mp_word)DIGIT_BIT);
} }
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */ /* setup dest */
olduse = c->used; olduse = c->used;
c->used = pa; c->used = pa;

View File

@ -1,3 +1,7 @@
Jan 26th, 2006
v0.38 -- broken makefile.shared fixed
-- removed some carry stores that were not required [updated text]
November 18th, 2005 November 18th, 2005
v0.37 -- [Don Porter] reported on a TCL list [HEY SEND ME BUGREPORTS ALREADY!!!] that mp_add_d() would compute -0 with some inputs. Fixed. v0.37 -- [Don Porter] reported on a TCL list [HEY SEND ME BUGREPORTS ALREADY!!!] that mp_add_d() would compute -0 with some inputs. Fixed.
-- [rinick@gmail.com] reported the makefile.bcc was messed up. Fixed. -- [rinick@gmail.com] reported the makefile.bcc was messed up. Fixed.

View File

@ -3,7 +3,7 @@
#Tom St Denis #Tom St Denis
#version of library #version of library
VERSION=0.37 VERSION=0.38
CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare

View File

@ -1,7 +1,7 @@
#Makefile for GCC #Makefile for GCC
# #
#Tom St Denis #Tom St Denis
VERSION=0:37 VERSION=0:38
CC = libtool --mode=compile gcc CC = libtool --mode=compile gcc

Binary file not shown.

View File

@ -458,10 +458,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */ /* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT); _W = _W >> ((mp_word)DIGIT_BIT);
} }
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */ /* setup dest */
olduse = c->used; olduse = c->used;
@ -564,9 +561,6 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT); _W = _W >> ((mp_word)DIGIT_BIT);
} }
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */ /* setup dest */
olduse = c->used; olduse = c->used;
c->used = pa; c->used = pa;

Binary file not shown.

View File

@ -66,7 +66,7 @@ QUALCOMM Australia \\
} }
} }
\maketitle \maketitle
This text has been placed in the public domain. This text corresponds to the v0.37 release of the This text has been placed in the public domain. This text corresponds to the v0.38 release of the
LibTomMath project. LibTomMath project.
\begin{alltt} \begin{alltt}
@ -77,7 +77,7 @@ K2L 1C3
Canada Canada
Phone: 1-613-836-3160 Phone: 1-613-836-3160
Email: tomstdenis@iahu.ca Email: tomstdenis@gmail.com
\end{alltt} \end{alltt}
This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{} This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
@ -2190,7 +2190,7 @@ left.
After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts
required. If it is non-zero a modified shift loop is used to calculate the remaining product. required. If it is non-zero a modified shift loop is used to calculate the remaining product.
Essentially the loop is a generic version of algorith mp\_mul2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$ Essentially the loop is a generic version of algorithm mp\_mul\_2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
variable is used to extract the upper $d$ bits to form the carry for the next iteration. variable is used to extract the upper $d$ bits to form the carry for the next iteration.
This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to
@ -2611,17 +2611,16 @@ Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the s
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\ \hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\ \hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\ \hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\ \\
7. $oldused \leftarrow c.used$ \\ 6. $oldused \leftarrow c.used$ \\
8. $c.used \leftarrow digs$ \\ 7. $c.used \leftarrow digs$ \\
9. for $ix$ from $0$ to $pa$ do \\ 8. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\ \hspace{3mm}8.1 $c_{ix} \leftarrow W_{ix}$ \\
10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\ 9. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\ \hspace{3mm}9.1 $c_{ix} \leftarrow 0$ \\
\\ \\
11. Clamp $c$. \\ 10. Clamp $c$. \\
12. Return MP\_OKAY. \\ 11. Return MP\_OKAY. \\
\hline \hline
\end{tabular} \end{tabular}
\end{center} \end{center}

View File

@ -66,7 +66,7 @@ QUALCOMM Australia \\
} }
} }
\maketitle \maketitle
This text has been placed in the public domain. This text corresponds to the v0.37 release of the This text has been placed in the public domain. This text corresponds to the v0.38 release of the
LibTomMath project. LibTomMath project.
\begin{alltt} \begin{alltt}
@ -77,7 +77,7 @@ K2L 1C3
Canada Canada
Phone: 1-613-836-3160 Phone: 1-613-836-3160
Email: tomstdenis@iahu.ca Email: tomstdenis@gmail.com
\end{alltt} \end{alltt}
This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{} This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
@ -3169,7 +3169,7 @@ left.
After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts
required. If it is non-zero a modified shift loop is used to calculate the remaining product. required. If it is non-zero a modified shift loop is used to calculate the remaining product.
Essentially the loop is a generic version of algorith mp\_mul2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$ Essentially the loop is a generic version of algorithm mp\_mul\_2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
variable is used to extract the upper $d$ bits to form the carry for the next iteration. variable is used to extract the upper $d$ bits to form the carry for the next iteration.
This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to
@ -3864,17 +3864,16 @@ Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the s
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\ \hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\ \hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\ \hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\ \\
7. $oldused \leftarrow c.used$ \\ 6. $oldused \leftarrow c.used$ \\
8. $c.used \leftarrow digs$ \\ 7. $c.used \leftarrow digs$ \\
9. for $ix$ from $0$ to $pa$ do \\ 8. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\ \hspace{3mm}8.1 $c_{ix} \leftarrow W_{ix}$ \\
10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\ 9. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\ \hspace{3mm}9.1 $c_{ix} \leftarrow 0$ \\
\\ \\
11. Clamp $c$. \\ 10. Clamp $c$. \\
12. Return MP\_OKAY. \\ 11. Return MP\_OKAY. \\
\hline \hline
\end{tabular} \end{tabular}
\end{center} \end{center}
@ -3977,33 +3976,30 @@ and addition operations in the nested loop in parallel.
077 077
078 /* make next carry */ 078 /* make next carry */
079 _W = _W >> ((mp_word)DIGIT_BIT); 079 _W = _W >> ((mp_word)DIGIT_BIT);
080 \} 080 \}
081 081
082 /* store final carry */ 082 /* setup dest */
083 W[ix] = (mp_digit)(_W & MP_MASK); 083 olduse = c->used;
084 084 c->used = pa;
085 /* setup dest */ 085
086 olduse = c->used; 086 \{
087 c->used = pa; 087 register mp_digit *tmpc;
088 088 tmpc = c->dp;
089 \{ 089 for (ix = 0; ix < pa+1; ix++) \{
090 register mp_digit *tmpc; 090 /* now extract the previous digit [below the carry] */
091 tmpc = c->dp; 091 *tmpc++ = W[ix];
092 for (ix = 0; ix < pa+1; ix++) \{ 092 \}
093 /* now extract the previous digit [below the carry] */ 093
094 *tmpc++ = W[ix]; 094 /* clear unused digits [that existed in the old copy of c] */
095 \} 095 for (; ix < olduse; ix++) \{
096 096 *tmpc++ = 0;
097 /* clear unused digits [that existed in the old copy of c] */ 097 \}
098 for (; ix < olduse; ix++) \{ 098 \}
099 *tmpc++ = 0; 099 mp_clamp (c);
100 \} 100 return MP_OKAY;
101 \} 101 \}
102 mp_clamp (c); 102 #endif
103 return MP_OKAY; 103
104 \}
105 #endif
106
\end{alltt} \end{alltt}
\end{small} \end{small}
@ -4020,7 +4016,7 @@ slower and also often doesn't exist. This new algorithm only performs two reads
compiler has aliased $\_ \hat W$ to a CPU register. compiler has aliased $\_ \hat W$ to a CPU register.
After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines 76, 79) to forward it as After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines 76, 79) to forward it as
a carry for the next pass. After the outer loop we use the final carry (line 83) as the last digit of the product. a carry for the next pass. After the outer loop we use the final carry (line 76) as the last digit of the product.
\subsection{Polynomial Basis Multiplication} \subsection{Polynomial Basis Multiplication}
To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms