diff --git a/aes.c b/aes.c index e0e4c96..949c22b 100644 --- a/aes.c +++ b/aes.c @@ -89,7 +89,7 @@ static ulong32 setup_mix(ulong32 temp) } #ifndef ENCRYPT_ONLY - +#ifdef SMALL_CODE static ulong32 setup_mix2(ulong32 temp) { return Td0(255 & Te4[byte(temp, 3)]) ^ @@ -97,7 +97,7 @@ static ulong32 setup_mix2(ulong32 temp) Td2(255 & Te4[byte(temp, 1)]) ^ Td3(255 & Te4[byte(temp, 0)]); } - +#endif #endif int SETUP(const unsigned char *key, int keylen, int rounds, symmetric_key *skey) diff --git a/changes b/changes index d29871b..e74b1d8 100644 --- a/changes +++ b/changes @@ -1,3 +1,26 @@ +July 23rd, 2004 +v0.97b -- Added PKCS #1 v1.5 RSA encrypt/sign helpers (like rsa_sign_hash, etc...) + -- Added missing prng check to rsa_decrypt_key() [not critical as I don't use + descriptors directly in that function] + -- Merged in LTM-SSE, define LTMSSE before you build and you will get SSE2 optimized math ;-) + (roughly 3x faster on a P4 Northwood). By default it will compile as ISO C portable + code (when LTMSSE is undefined). + -- Fixed bug in ltc_tommath.h where I had the kara/toom cutoffs not marked as ``extern'' + Thanks to "Stefan Arentz" + -- Steven Dake and Richard Amacker submitted patches to + fix pkcs_5_2(). It now matches the output of another crypto library. Whoops... hehehe + -- Updated PRNG api. Added Fortuna PRNG to the list of supported PRNGs + -- Fixed up the descriptor tables since globals are automatically zero'ed on startup. + -- Changed RC4 to store it's output. If you want to encrypt with RC4 + you'll have to do the XOR yourself. + -- Fixed buffer overflows/overruns in the HMAC code. + + ++ API change for the PRNGs there now is a done() function per PRNG. You + should call it when you are done with a prng state. So far it's + not absolutely required (won't cause problems) but is a good idea to + start. + + June 23rd, 2004 v0.97a ++ Fixed several potentially crippling bugs... [read on] -- Fixed bug in OAEP decoder that would incorrectly report diff --git a/crypt.c b/crypt.c index e6800fd..d71066c 100644 --- a/crypt.c +++ b/crypt.c @@ -229,9 +229,6 @@ const char *crypt_build_settings = #endif #if defined(NO_FILE) " NO_FILE " -#endif -#if defined(LTC_TEST) - " LTC_TEST " #endif "\n" "\n\n\n" diff --git a/crypt.tex b/crypt.tex index e3b40af..4b3d103 100644 --- a/crypt.tex +++ b/crypt.tex @@ -47,7 +47,7 @@ \def\gap{\vspace{0.5ex}} \makeindex \begin{document} -\title{LibTomCrypt \\ Version 0.97a} +\title{LibTomCrypt \\ Version 0.97b} \author{Tom St Denis \\ \\ tomstdenis@iahu.ca \\ diff --git a/crypt_cipher_descriptor.c b/crypt_cipher_descriptor.c index fd69d5b..4a8a943 100644 --- a/crypt_cipher_descriptor.c +++ b/crypt_cipher_descriptor.c @@ -10,37 +10,5 @@ */ #include "mycrypt.h" -struct _cipher_descriptor cipher_descriptor[TAB_SIZE] = { -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; +struct _cipher_descriptor cipher_descriptor[TAB_SIZE]; diff --git a/crypt_hash_descriptor.c b/crypt_hash_descriptor.c index 1048be8..5c02255 100644 --- a/crypt_hash_descriptor.c +++ b/crypt_hash_descriptor.c @@ -10,36 +10,5 @@ */ #include "mycrypt.h" -struct _hash_descriptor hash_descriptor[TAB_SIZE] = { -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL }, -{ NULL, 0, 0, 0, { 0x00 }, 0, NULL, NULL, NULL, NULL } }; +struct _hash_descriptor hash_descriptor[TAB_SIZE]; + diff --git a/crypt_prng_descriptor.c b/crypt_prng_descriptor.c index 593516d..129118f 100644 --- a/crypt_prng_descriptor.c +++ b/crypt_prng_descriptor.c @@ -10,37 +10,4 @@ */ #include "mycrypt.h" -struct _prng_descriptor prng_descriptor[TAB_SIZE] = { -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL }, -{ NULL, NULL, NULL, NULL, NULL } }; - +struct _prng_descriptor prng_descriptor[TAB_SIZE]; diff --git a/demos/test/rsa_test.c b/demos/test/rsa_test.c index 3e72e44..777fc87 100644 --- a/demos/test/rsa_test.c +++ b/demos/test/rsa_test.c @@ -21,6 +21,35 @@ int rsa_test(void) /* make a random key */ DO(rsa_make_key(&test_yarrow, prng_idx, 1024/8, 65537, &key)); + /* test PKCS #1 v1.5 */ + for (rsa_msgsize = 1; rsa_msgsize <= 117; rsa_msgsize++) { + /* make a random key/msg */ + yarrow_read(in, rsa_msgsize, &test_yarrow); + + len = sizeof(out); + len2 = rsa_msgsize; + + /* encrypt */ + DO(rsa_v15_encrypt_key(in, rsa_msgsize, out, &len, &test_yarrow, prng_idx, &key)); + DO(rsa_v15_decrypt_key(out, len, tmp, rsa_msgsize, &test_yarrow, prng_idx, &stat, &key)); + if (stat != 1 || memcmp(tmp, in, rsa_msgsize)) { + printf("PKCS #1 v1.5 encrypt/decrypt failure (rsa_msgsize: %lu, stat: %d)\n", rsa_msgsize, stat); + return 1; + } + } + + /* signature */ + len = sizeof(out); + DO(rsa_v15_sign_hash(in, 20, out, &len, &test_yarrow, prng_idx, hash_idx, &key)); + in[1] ^= 1; + DO(rsa_v15_verify_hash(out, len, in, 20, &test_yarrow, prng_idx, hash_idx, &stat, &key)); + in[1] ^= 1; + DO(rsa_v15_verify_hash(out, len, in, 20, &test_yarrow, prng_idx, hash_idx, &stat2, &key)); + if (!(stat == 0 && stat2 == 1)) { + printf("PKCS #1 v1.5 sign/verify failure (stat %d, stat2 %d)\n", stat, stat2); + return 1; + } + /* encrypt the key (without lparam) */ for (rsa_msgsize = 1; rsa_msgsize <= 86; rsa_msgsize++) { /* make a random key/msg */ @@ -47,7 +76,7 @@ int rsa_test(void) return 1; } if (len2 != rsa_msgsize || memcmp(tmp, in, rsa_msgsize)) { - int x; + unsigned long x; printf("\nrsa_decrypt_key mismatch, len %lu (second decrypt)\n", len2); printf("Original contents: \n"); for (x = 0; x < rsa_msgsize; ) { diff --git a/demos/tv_gen.c b/demos/tv_gen.c index cf75d3b..17f7ad2 100644 --- a/demos/tv_gen.c +++ b/demos/tv_gen.c @@ -86,22 +86,33 @@ void reg_algs(void) void hash_gen(void) { - unsigned char md[MAXBLOCKSIZE], buf[MAXBLOCKSIZE*2+2]; + unsigned char md[MAXBLOCKSIZE], *buf; unsigned long outlen, x, y, z; FILE *out; + int err; out = fopen("hash_tv.txt", "w"); + if (out == NULL) { + perror("can't open hash_tv"); + } fprintf(out, "Hash Test Vectors:\n\nThese are the hashes of nn bytes '00 01 02 03 .. (nn-1)'\n\n"); for (x = 0; hash_descriptor[x].name != NULL; x++) { + buf = XMALLOC(2 * hash_descriptor[x].blocksize); + if (buf == NULL) { + perror("can't alloc mem"); + exit(EXIT_FAILURE); + } fprintf(out, "Hash: %s\n", hash_descriptor[x].name); - for (y = 0; y <= (hash_descriptor[x].blocksize * 2); y++) { for (z = 0; z < y; z++) { buf[z] = (unsigned char)(z & 255); } outlen = sizeof(md); - hash_memory(x, buf, y, md, &outlen); + if ((err = hash_memory(x, buf, y, md, &outlen)) != CRYPT_OK) { + printf("hash_memory error: %s\n", error_to_string(err)); + exit(EXIT_FAILURE); + } fprintf(out, "%3lu: ", y); for (z = 0; z < outlen; z++) { fprintf(out, "%02X", md[z]); @@ -109,15 +120,16 @@ void hash_gen(void) fprintf(out, "\n"); } fprintf(out, "\n"); + XFREE(buf); } fclose(out); } void cipher_gen(void) { - unsigned char key[MAXBLOCKSIZE], pt[MAXBLOCKSIZE]; + unsigned char *key, pt[MAXBLOCKSIZE]; unsigned long x, y, z, w; - int kl, lastkl; + int err, kl, lastkl; FILE *out; symmetric_key skey; @@ -138,15 +150,27 @@ void cipher_gen(void) case 1: kl = (cipher_descriptor[x].min_key_length + cipher_descriptor[x].max_key_length)/2; break; case 2: kl = cipher_descriptor[x].max_key_length; break; } - cipher_descriptor[x].keysize(&kl); + if ((err = cipher_descriptor[x].keysize(&kl)) != CRYPT_OK) { + printf("keysize error: %s\n", error_to_string(err)); + exit(EXIT_FAILURE); + } if (kl == lastkl) break; lastkl = kl; fprintf(out, "Key Size: %d bytes\n", kl); + key = XMALLOC(kl); + if (key == NULL) { + perror("can't malloc memory"); + exit(EXIT_FAILURE); + } + for (z = 0; (int)z < kl; z++) { key[z] = (unsigned char)z; } - cipher_descriptor[x].setup(key, kl, 0, &skey); + if ((err = cipher_descriptor[x].setup(key, kl, 0, &skey)) != CRYPT_OK) { + printf("setup error: %s\n", error_to_string(err)); + exit(EXIT_FAILURE); + } for (z = 0; (int)z < cipher_descriptor[x].block_length; z++) { pt[z] = (unsigned char)z; @@ -163,9 +187,13 @@ void cipher_gen(void) for (z = 0; z < (unsigned long)kl; z++) { key[z] = pt[z % cipher_descriptor[x].block_length]; } - cipher_descriptor[x].setup(key, kl, 0, &skey); + if ((err = cipher_descriptor[x].setup(key, kl, 0, &skey)) != CRYPT_OK) { + printf("cipher setup2 error: %s\n", error_to_string(err)); + exit(EXIT_FAILURE); + } } fprintf(out, "\n"); + XFREE(key); } fprintf(out, "\n"); } @@ -174,7 +202,7 @@ void cipher_gen(void) void hmac_gen(void) { - unsigned char key[MAXBLOCKSIZE], output[MAXBLOCKSIZE], input[MAXBLOCKSIZE*2+2]; + unsigned char key[MAXBLOCKSIZE], output[MAXBLOCKSIZE], *input; int x, y, z, kl, err; FILE *out; unsigned long len; @@ -193,6 +221,12 @@ void hmac_gen(void) for (y = 0; y < (int)hash_descriptor[x].hashsize; y++) { key[y] = (y&255); } + + input = XMALLOC(hash_descriptor[x].blocksize * 2); + if (input == NULL) { + perror("Can't malloc memory"); + exit(EXIT_FAILURE); + } for (y = 0; y <= (int)(hash_descriptor[x].blocksize * 2); y++) { for (z = 0; z < y; z++) { @@ -212,6 +246,7 @@ void hmac_gen(void) /* forward the key */ memcpy(key, output, hash_descriptor[x].hashsize); } + XFREE(input); fprintf(out, "\n"); } fclose(out); diff --git a/demos/x86_prof.c b/demos/x86_prof.c index e968554..a478614 100644 --- a/demos/x86_prof.c +++ b/demos/x86_prof.c @@ -45,9 +45,6 @@ void tally_results(int type) } } - - - /* RDTSC from Scott Duplichan */ static ulong64 rdtsc (void) { @@ -195,6 +192,9 @@ void reg_algs(void) #endif register_prng(&yarrow_desc); +register_prng(&fortuna_desc); +register_prng(&rc4_desc); + rng_make_prng(128, find_prng("yarrow"), &prng, NULL); } @@ -342,6 +342,101 @@ int time_hash(void) return 0; } +void time_mult(void) +{ + ulong64 t1, t2; + unsigned long x, y; + mp_int a, b, c; + + printf("Timing Multiplying:\n"); + mp_init_multi(&a,&b,&c,NULL); + for (x = 128/DIGIT_BIT; x <= 1024/DIGIT_BIT; x += 128/DIGIT_BIT) { + mp_rand(&a, x); + mp_rand(&b, x); + +#define DO1 mp_mul(&a, &b, &c); +#define DO2 DO1; DO1; + + t2 = -1; + for (y = 0; y < TIMES; y++) { + t_start(); + t1 = t_read(); + DO2; + t1 = (t_read() - t1)>>1; + if (t1 < t2) t2 = t1; + } + printf("%3d digits: %9llu cycles\n", x, t2); + } + mp_clear_multi(&a,&b,&c,NULL); + +#undef DO1 +#undef DO2 +} + +void time_sqr(void) +{ + ulong64 t1, t2; + unsigned long x, y; + mp_int a, b; + + printf("Timing Squaring:\n"); + mp_init_multi(&a,&b,NULL); + for (x = 128/DIGIT_BIT; x <= 1024/DIGIT_BIT; x += 128/DIGIT_BIT) { + mp_rand(&a, x); + +#define DO1 mp_sqr(&a, &b); +#define DO2 DO1; DO1; + + t2 = -1; + for (y = 0; y < TIMES; y++) { + t_start(); + t1 = t_read(); + DO2; + t1 = (t_read() - t1)>>1; + if (t1 < t2) t2 = t1; + } + printf("%3d digits: %9llu cycles\n", x, t2); + } + mp_clear_multi(&a,&b,NULL); + +#undef DO1 +#undef DO2 +} + +void time_prng(void) +{ + ulong64 t1, t2; + unsigned char buf[4096]; + prng_state prng; + unsigned long x, y; + + printf("Timing PRNGs:\n"); + for (x = 0; prng_descriptor[x].name != NULL; x++) { + prng_descriptor[x].start(&prng); + zeromem(buf, 256); + prng_descriptor[x].add_entropy(buf, 256, &prng); + prng_descriptor[x].ready(&prng); + t2 = -1; + +#define DO1 prng_descriptor[x].read(buf, 4096, &prng); +#define DO2 DO1 DO1 + + for (y = 0; y < 10000; y++) { + t_start(); + t1 = t_read(); + DO2; + t1 = (t_read() - t1)>>1; + if (t1 < t2) t2 = t1; + } + printf("%20s: %llu\n", prng_descriptor[x].name, t2>>12); + } +#undef DO2 +#undef DO1 + +} + + + int main(void) { reg_algs(); @@ -349,6 +444,9 @@ int main(void) printf("Timings for ciphers and hashes. Times are listed as cycles per byte processed.\n\n"); // init_timer(); + time_mult(); + time_sqr(); + time_prng(); time_cipher(); time_keysched(); time_hash(); diff --git a/doc/crypt.pdf b/doc/crypt.pdf index c8f5a95..b7555f5 100644 Binary files a/doc/crypt.pdf and b/doc/crypt.pdf differ diff --git a/examples/ch1-01.c b/examples/ch1-01.c deleted file mode 100644 index 010ccd7..0000000 --- a/examples/ch1-01.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Name : ch1-01.c - * Purpose : Demonstration of a basic libtomcrypt program - * Author : Tom St Denis - * - * History : v0.79 Initial release - */ - -/* ch1-01-1 */ -/* Include the default headers and libtomcrypt headers */ -#include - -int main(void) -{ - return 0; -} -/* ch1-01-1 */ - diff --git a/examples/ch1-02.c b/examples/ch1-02.c deleted file mode 100644 index 9d41f21..0000000 --- a/examples/ch1-02.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Name : ch1-02.c - * Purpose : Demonstration of error handling - * Author : Tom St Denis - * - * History : v0.79 Initial release - */ - -/* ch1-01-1 */ -#include - -int main(void) -{ - int errno; - - if ((errno = some_func(...)) != CRYPT_OK) { - printf("Error: %s\n", error_to_string(errno)); - return EXIT_FAILURE; - } - - return 0; -} -/*ch1-01-1 */ - - diff --git a/examples/ch1-03.c b/examples/ch1-03.c deleted file mode 100644 index c749aa1..0000000 --- a/examples/ch1-03.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Name : ch1-03.c - * Purpose : Demonstration of variable length outputs - * Author : Tom St Denis - * - * History : v0.79 Initial release - */ - - /* ch1-01-1 */ - #include - - int main(void) - { - unsigned long length; - unsigned char buffer[512]; - int errno; - - length = sizeof(buffer); - if ((errno = some_func(..., buffer, &length)) != CRYPT_OK) { - printf("Error: %s\n", error_to_string(errno)); - return EXIT_FAILURE; - } - printf("Size of output is %lu bytes\n", length); - return 0; -} -/* ch1-01-1 */ - - - \ No newline at end of file diff --git a/examples/ch2-01.c b/examples/ch2-01.c deleted file mode 100644 index b565479..0000000 --- a/examples/ch2-01.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Name : ch2-01.c - * Purpose : Demonstration of reading the RNG - * Author : Tom St Denis - * - * History : v0.81 Initial release - */ - - /* ch2-02-2 */ - #include - - int main(void) - { - unsigned char buf[16]; - unsigned long len; - int ix; - - /* read the RNG */ - len = rng_get_bytes(buf, sizeof(buf), NULL); - - /* verify return */ - if (len != sizeof(buf)) { - printf("Error: Only read %lu bytes.\n", len); - } else { - printf("Read %lu bytes\n", len); - for (ix = 0; ix < sizeof(buf); ix++) { - printf("%02x ", buf[ix]); - } - printf("\n"); - } - - return EXIT_SUCCESS; -} -/* ch2-02-2 */ - diff --git a/fortuna.c b/fortuna.c new file mode 100644 index 0000000..32cfa08 --- /dev/null +++ b/fortuna.c @@ -0,0 +1,256 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis + * + * LibTomCrypt is a library that provides various cryptographic + * algorithms in a highly modular and flexible manner. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org + */ + +/* Implementation of Fortuna by Tom St Denis + +We deviate slightly here for reasons of simplicity [and to fit in the API]. First all "sources" +in the AddEntropy function are fixed to 0. Second since no reliable timer is provided +we reseed automatically when len(pool0) >= 64 or every FORTUNA_WD calls to the read function */ + +#include "mycrypt.h" + +#ifdef FORTUNA + +const struct _prng_descriptor fortuna_desc = { + "fortuna", + &fortuna_start, + &fortuna_add_entropy, + &fortuna_ready, + &fortuna_read, + &fortuna_done, + &fortuna_export, + &fortuna_import + +}; + +/* update the IV */ +static void fortuna_update_iv(prng_state *prng) +{ + int x; + unsigned char *IV; + /* update IV */ + IV = prng->fortuna.IV; + for (x = 0; x < 16; x++) { + IV[x] = (IV[x] + 1) & 255; + if (IV[x] != 0) break; + } +} + +/* reseed the PRNG */ +static int fortuna_reseed(prng_state *prng) +{ + unsigned char tmp[32]; + hash_state md; + int err, x; + + ++prng->fortuna.reset_cnt; + + /* new K == SHA256(K || s) where s == SHA256(P0) || SHA256(P1) ... */ + sha256_init(&md); + if ((err = sha256_process(&md, prng->fortuna.K, 32)) != CRYPT_OK) { + return err; + } + + for (x = 0; x < 32; x++) { + if (x == 0 || ((prng->fortuna.reset_cnt >> (x-1)) & 1) == 0) { + /* terminate this hash */ + if ((err = sha256_done(&prng->fortuna.pool[x], tmp)) != CRYPT_OK) { + return err; + } + /* add it to the string */ + if ((err = sha256_process(&md, tmp, 32)) != CRYPT_OK) { + return err; + } + /* reset this pool */ + sha256_init(&prng->fortuna.pool[x]); + } else { + break; + } + } + + /* finish key */ + if ((err = sha256_done(&md, prng->fortuna.K)) != CRYPT_OK) { + return err; + } + if ((err = rijndael_setup(prng->fortuna.K, 32, 0, &prng->fortuna.skey)) != CRYPT_OK) { + return err; + } + fortuna_update_iv(prng); + + /* reset pool len */ + prng->fortuna.pool0_len = 0; + prng->fortuna.wd = 0; + + +#ifdef CLEAN_STACK + zeromem(&md, sizeof(md)); + zeromem(tmp, sizeof(tmp)); +#endif + + return CRYPT_OK; +} + +int fortuna_start(prng_state *prng) +{ + int err, x; + + _ARGCHK(prng != NULL); + + /* initialize the pools */ + for (x = 0; x < 32; x++) { + sha256_init(&prng->fortuna.pool[x]); + } + prng->fortuna.pool_idx = prng->fortuna.pool0_len = prng->fortuna.reset_cnt = + prng->fortuna.wd = 0; + + /* reset bufs */ + zeromem(prng->fortuna.K, 32); + if ((err = rijndael_setup(prng->fortuna.K, 32, 0, &prng->fortuna.skey)) != CRYPT_OK) { + return err; + } + zeromem(prng->fortuna.IV, 16); + + return CRYPT_OK; +} + +int fortuna_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng) +{ + unsigned char tmp[2]; + int err; + + _ARGCHK(buf != NULL); + _ARGCHK(prng != NULL); + + /* ensure len <= 32 */ + if (len > 32) { + return CRYPT_INVALID_ARG; + } + + /* add s || length(buf) || buf to pool[pool_idx] */ + tmp[0] = 0; + tmp[1] = len; + if ((err = sha256_process(&prng->fortuna.pool[prng->fortuna.pool_idx], tmp, 2)) != CRYPT_OK) { + return err; + } + if ((err = sha256_process(&prng->fortuna.pool[prng->fortuna.pool_idx], buf, len)) != CRYPT_OK) { + return err; + } + if (prng->fortuna.pool_idx == 0) { + prng->fortuna.pool0_len += len + 2; + } + prng->fortuna.pool_idx = (prng->fortuna.pool_idx + 1) & 31; + + return CRYPT_OK; +} + +int fortuna_ready(prng_state *prng) +{ + return fortuna_reseed(prng); +} + +unsigned long fortuna_read(unsigned char *dst, unsigned long len, prng_state *prng) +{ + unsigned char tmp[16]; + int err; + unsigned long tlen, n; + + _ARGCHK(dst != NULL); + _ARGCHK(prng != NULL); + + /* do we have to reseed? */ + if (++prng->fortuna.wd == FORTUNA_WD || prng->fortuna.pool0_len >= 64) { + if ((err = fortuna_reseed(prng)) != CRYPT_OK) { + return 0; + } + } + + /* now generate the blocks required */ + tlen = len; + while (len > 0) { + if (len >= 16) { + /* encrypt the IV and store it */ + rijndael_ecb_encrypt(prng->fortuna.IV, dst, &prng->fortuna.skey); + dst += 16; + len -= 16; + } else { + rijndael_ecb_encrypt(prng->fortuna.IV, tmp, &prng->fortuna.skey); + XMEMCPY(dst, tmp, len); + len = 0; + } + fortuna_update_iv(prng); + } + + /* generate new key */ + rijndael_ecb_encrypt(prng->fortuna.IV, prng->fortuna.K , &prng->fortuna.skey); fortuna_update_iv(prng); + rijndael_ecb_encrypt(prng->fortuna.IV, prng->fortuna.K+16, &prng->fortuna.skey); fortuna_update_iv(prng); + if ((err = rijndael_setup(prng->fortuna.K, 32, 0, &prng->fortuna.skey)) != CRYPT_OK) { + return 0; + } + +#ifdef CLEAN_STACK + zeromem(tmp, sizeof(tmp)); +#endif + return tlen; +} + +void fortuna_done(prng_state *prng) +{ + _ARGCHK(prng != NULL); + /* call cipher done when we invent one ;-) */ +} + +int fortuna_export(unsigned char *out, unsigned long *outlen, prng_state *prng) +{ + int x; + + _ARGCHK(out != NULL); + _ARGCHK(outlen != NULL); + _ARGCHK(prng != NULL); + + /* we'll write 2048 bytes for s&g's */ + if (*outlen < 2048) { + return CRYPT_BUFFER_OVERFLOW; + } + + for (x = 0; x < 32; x++) { + if (fortuna_read(out+x*64, 64, prng) != 64) { + return CRYPT_ERROR_READPRNG; + } + } + *outlen = 2048; + + return CRYPT_OK; +} + +int fortuna_import(const unsigned char *in, unsigned long inlen, prng_state *prng) +{ + int err, x; + + _ARGCHK(in != NULL); + _ARGCHK(prng != NULL); + + if (inlen != 2048) { + return CRYPT_INVALID_ARG; + } + + if ((err = fortuna_start(prng)) != CRYPT_OK) { + return err; + } + for (x = 0; x < 32; x++) { + if ((err = fortuna_add_entropy(in+x*64, 64, &prng)) != CRYPT_OK) { + return err; + } + } + return fortuna_ready(&prng); +} + +#endif + diff --git a/hmac_done.c b/hmac_done.c index c308cc3..7ded6a1 100644 --- a/hmac_done.c +++ b/hmac_done.c @@ -94,6 +94,7 @@ int hmac_done(hmac_state *hmac, unsigned char *hashOut, unsigned long *outlen) err = CRYPT_OK; __ERR: + XFREE(hmac->key); #ifdef CLEAN_STACK zeromem(isha, hashsize); zeromem(buf, hashsize); diff --git a/hmac_init.c b/hmac_init.c index 076b529..2cbf001 100644 --- a/hmac_init.c +++ b/hmac_init.c @@ -61,9 +61,16 @@ int hmac_init(hmac_state *hmac, int hash, const unsigned char *key, unsigned lon return CRYPT_MEM; } + /* allocate memory for key */ + hmac->key = XMALLOC(HMAC_BLOCKSIZE); + if (hmac->key == NULL) { + XFREE(buf); + return CRYPT_MEM; + } + // (1) make sure we have a large enough key if(keylen > HMAC_BLOCKSIZE) { - z = (unsigned long)sizeof(hmac->key); + z = (unsigned long)HMAC_BLOCKSIZE; if ((err = hash_memory(hash, key, keylen, hmac->key, &z)) != CRYPT_OK) { goto __ERR; } diff --git a/hmac_test.c b/hmac_test.c index 4ec7d94..c0f2185 100644 --- a/hmac_test.c +++ b/hmac_test.c @@ -285,7 +285,7 @@ Key First" outlen = sizeof(digest); if((err = hmac_memory(hash, cases[i].key, cases[i].keylen, cases[i].data, cases[i].datalen, digest, &outlen)) != CRYPT_OK) { #if 0 - printf("HMAC-%s test #%d\n", cases[i].algo, cases[i].num); + printf("HMAC-%s test #%d, %s\n", cases[i].algo, cases[i].num, error_to_string(err)); #endif return err; } diff --git a/ltc_tommath.h b/ltc_tommath.h index a53c973..3276141 100644 --- a/ltc_tommath.h +++ b/ltc_tommath.h @@ -1,3 +1,4 @@ + /* LibTomMath, multiple-precision integer library -- Tom St Denis * * LibTomMath is a library that provides multiple-precision @@ -20,6 +21,7 @@ #include #include #include +#include #undef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) @@ -147,7 +149,7 @@ extern "C" { typedef int mp_err; /* you'll have to tune these... */ - int KARATSUBA_MUL_CUTOFF, +extern int KARATSUBA_MUL_CUTOFF, KARATSUBA_SQR_CUTOFF, TOOM_MUL_CUTOFF, TOOM_SQR_CUTOFF; @@ -552,6 +554,7 @@ void bn_reverse(unsigned char *s, int len); const char *mp_s_rmap; + #ifdef __cplusplus } #endif diff --git a/makefile b/makefile index 6b9a5da..b9e810f 100644 --- a/makefile +++ b/makefile @@ -4,7 +4,7 @@ # Modified by Clay Culver # The version -VERSION=0.97a +VERSION=0.97b # Compiler and Linker Names #CC=gcc @@ -63,7 +63,7 @@ crypt_find_cipher_id.o crypt_find_prng.o crypt_prng_is_valid.o \ crypt_unregister_cipher.o crypt_cipher_is_valid.o crypt_find_hash.o \ crypt_hash_descriptor.o crypt_register_cipher.o crypt_unregister_hash.o \ \ -sprng.o yarrow.o rc4.o rng_get_bytes.o rng_make_prng.o \ +fortuna.o sprng.o yarrow.o rc4.o rng_get_bytes.o rng_make_prng.o \ \ rand_prime.o is_prime.o \ \ @@ -71,6 +71,7 @@ ecc.o dh.o \ \ rsa_decrypt_key.o rsa_encrypt_key.o rsa_exptmod.o rsa_free.o rsa_make_key.o \ rsa_sign_hash.o rsa_verify_hash.o rsa_export.o rsa_import.o tim_exptmod.o \ +rsa_v15_encrypt_key.o rsa_v15_decrypt_key.o rsa_v15_sign_hash.o rsa_v15_verify_hash.o \ \ dsa_export.o dsa_free.o dsa_import.o dsa_make_key.o dsa_sign_hash.o \ dsa_verify_hash.o dsa_verify_key.o \ diff --git a/makefile.cygwin_dll b/makefile.cygwin_dll index 287677d..0dece2f 100644 --- a/makefile.cygwin_dll +++ b/makefile.cygwin_dll @@ -28,7 +28,7 @@ crypt_find_cipher_id.o crypt_find_prng.o crypt_prng_is_valid.o \ crypt_unregister_cipher.o crypt_cipher_is_valid.o crypt_find_hash.o \ crypt_hash_descriptor.o crypt_register_cipher.o crypt_unregister_hash.o \ \ -sprng.o yarrow.o rc4.o rng_get_bytes.o rng_make_prng.o \ +sprng.o fortuna.o yarrow.o rc4.o rng_get_bytes.o rng_make_prng.o \ \ rand_prime.o is_prime.o \ \ @@ -36,6 +36,7 @@ ecc.o dh.o \ \ rsa_decrypt_key.o rsa_encrypt_key.o rsa_exptmod.o rsa_free.o rsa_make_key.o \ rsa_sign_hash.o rsa_verify_hash.o rsa_export.o rsa_import.o tim_exptmod.o \ +rsa_v15_encrypt_key.o rsa_v15_decrypt_key.o rsa_v15_sign_hash.o rsa_v15_verify_hash.o \ \ dsa_export.o dsa_free.o dsa_import.o dsa_make_key.o dsa_sign_hash.o \ dsa_verify_hash.o dsa_verify_key.o \ diff --git a/makefile.icc b/makefile.icc index e49cb66..4019e2f 100644 --- a/makefile.icc +++ b/makefile.icc @@ -41,7 +41,7 @@ default:library # B - Blend of P4 and PM [mobile] # # Default to just generic max opts -CFLAGS += -O3 -xN -ip +CFLAGS += -O3 -xN -ip # want to see stuff? #CFLAGS += -opt_report @@ -79,7 +79,7 @@ crypt_find_cipher_id.o crypt_find_prng.o crypt_prng_is_valid.o \ crypt_unregister_cipher.o crypt_cipher_is_valid.o crypt_find_hash.o \ crypt_hash_descriptor.o crypt_register_cipher.o crypt_unregister_hash.o \ \ -sprng.o yarrow.o rc4.o rng_get_bytes.o rng_make_prng.o \ +sprng.o fortuna.o yarrow.o rc4.o rng_get_bytes.o rng_make_prng.o \ \ rand_prime.o is_prime.o \ \ @@ -87,6 +87,7 @@ ecc.o dh.o \ \ rsa_decrypt_key.o rsa_encrypt_key.o rsa_exptmod.o rsa_free.o rsa_make_key.o \ rsa_sign_hash.o rsa_verify_hash.o rsa_export.o rsa_import.o tim_exptmod.o \ +rsa_v15_encrypt_key.o rsa_v15_decrypt_key.o rsa_v15_sign_hash.o rsa_v15_verify_hash.o \ \ dsa_export.o dsa_free.o dsa_import.o dsa_make_key.o dsa_sign_hash.o \ dsa_verify_hash.o dsa_verify_key.o \ diff --git a/makefile.msvc b/makefile.msvc index aa8a9bc..6fd60ef 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -18,7 +18,7 @@ crypt_find_cipher_id.obj crypt_find_prng.obj crypt_prng_is_valid.obj crypt_unregister_cipher.obj crypt_cipher_is_valid.obj crypt_find_hash.obj \ crypt_hash_descriptor.obj crypt_register_cipher.obj crypt_unregister_hash.obj \ \ -sprng.obj yarrow.obj rc4.obj rng_get_bytes.obj rng_make_prng.obj \ +sprng.obj fortuna.obj yarrow.obj rc4.obj rng_get_bytes.obj rng_make_prng.obj \ \ rand_prime.obj is_prime.obj \ \ @@ -26,6 +26,7 @@ ecc.obj dh.obj \ \ rsa_decrypt_key.obj rsa_encrypt_key.obj rsa_exptmod.obj rsa_free.obj rsa_make_key.obj \ rsa_sign_hash.obj rsa_verify_hash.obj rsa_export.obj rsa_import.obj tim_exptmod.obj \ +rsa_v15_encrypt_key.obj rsa_v15_decrypt_key.obj rsa_v15_sign_hash.obj rsa_v15_verify_hash.obj \ \ dsa_export.obj dsa_free.obj dsa_import.obj dsa_make_key.obj dsa_sign_hash.obj \ dsa_verify_hash.obj dsa_verify_key.obj \ diff --git a/mpi.c b/mpi.c index d3d9918..ae120d8 100644 --- a/mpi.c +++ b/mpi.c @@ -258,6 +258,15 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) /* now we proceed to zero successive digits * from the least significant upwards */ +#ifdef LTMSSE + // compute globals we'd like to have in MMX registers + asm ("movl $268435455,%%eax \n\t" //mm2 == MP_MASK + "movd %%eax,%%mm2 \n\t" + "movd %0,%%mm3 \n\t" //mm3 = rho + "movq (%1),%%mm0 \n\t" // W[ix] for ix=0 + ::"r"(rho),"r"(W):"%eax"); +#endif + for (ix = 0; ix < n->used; ix++) { /* mu = ai * m' mod b * @@ -265,9 +274,13 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) * by casting the value down to a mp_digit. Note this requires * that W[ix-1] have the carry cleared (see after the inner loop) */ +#ifndef LTMSSE register mp_digit mu; mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); - +#else + asm("pmuludq %mm3,%mm0 \n\t" // multiply against rho + "pand %mm2,%mm0 \n\t"); // mu == mm0 +#endif /* a = a + mu * m * b**i * * This is computed in place and on the fly. The multiplication @@ -295,13 +308,33 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) /* inner loop */ for (iy = 0; iy < n->used; iy++) { +#ifndef LTMSSE *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); +#else +// SSE version + asm ("movd (%0), %%mm1 \n\t" // load right side + "pmuludq %%mm0,%%mm1 \n\t" // multiply into left side + "paddq (%1),%%mm1 \n\t" // add 64-bit result out + "movq %%mm1,(%1)" // store result + :: "r"(tmpn), "r"(_W)); + // update pointers + ++tmpn; + ++_W; +#endif } } /* now fix carry for next digit, W[ix+1] */ +#ifndef LTMSSE W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); - } +#else + asm("movq (%0),%%mm0 \n\t" // W[ix] + "psrlq $28,%%mm0 \n\t" // W[ix]>>28 + "paddq 8(%0),%%mm0 \n\t" // W[ix+1] + W[ix]>>28 + "movq %%mm0,8(%0) " // store + ::"r"(&W[ix])); +#endif +} /* now we have to propagate the carries and * shift the words downward [all those least @@ -319,35 +352,36 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) /* alias for next word, where the carry goes */ _W = W + ++ix; - for (; ix <= n->used * 2 + 1; ix++) { - *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); - } - - /* copy out, A = A/b**n - * - * The result is A/b**n but instead of converting from an - * array of mp_word to mp_digit than calling mp_rshd - * we just copy them in the right order - */ - /* alias for destination word */ tmpx = x->dp; - /* alias for shifted double precision result */ - _W = W + n->used; - - for (ix = 0; ix < n->used + 1; ix++) { - *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); + for (; ix <= n->used * 2 + 1; ix++) { +#ifndef LTMSSE + *tmpx++ = (mp_digit)(*_W1 & ((mp_word) MP_MASK)); + *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); +#else + asm("movq %%mm0,%%mm1 \n\t" // copy of W[ix] + "psrlq $28,%%mm0 \n\t" // >>28 + "pand %%mm2,%%mm1 \n\t" // & with MP_MASK + "paddq (%0),%%mm0 \n\t" // += _W + "movd %%mm1,(%1) \n\t" // store it + ::"r"(_W),"r"(tmpx)); + ++_W; ++tmpx; +#endif } /* zero oldused digits, if the input a was larger than * m->used+1 we'll have to clear the digits */ - for (; ix < olduse; ix++) { + for (ix = n->used + 1; ix < olduse; ix++) { *tmpx++ = 0; } } +#ifdef LTMSSE + asm("emms"); +#endif + /* set the max used and clamp */ x->used = n->used + 1; mp_clamp (x); @@ -408,7 +442,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) } /* clear temp buf (the columns) */ - XMEMSET (W, 0, sizeof (mp_word) * digs); + memset (W, 0, sizeof (mp_word) * digs); /* calculate the columns */ pa = a->used; @@ -423,13 +457,21 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) * the loop without scheduling problems */ { - register mp_digit tmpx, *tmpy; +#ifndef LTMSSE + register mp_digit tmpx; +#endif + + register mp_digit *tmpy; register mp_word *_W; register int iy, pb; /* alias for the the word on the left e.g. A[ix] * A[iy] */ +#ifndef LTMSSE tmpx = a->dp[ix]; - +#else +// SSE: now we load the left side in mm0 + asm (" movd %0, %%mm0 " :: "r"(a->dp[ix])); +#endif /* alias for the right side */ tmpy = b->dp; @@ -445,7 +487,19 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) pb = MIN (b->used, digs - ix); for (iy = 0; iy < pb; iy++) { +#ifndef LTMSSE *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); +#else +// SSE version + asm ("movd (%0), %%mm1 \n\t" // load right side + "pmuludq %%mm0,%%mm1 \n\t" // multiply into left side + "paddq (%1), %%mm1 \n\t" // add 64-bit result out + "movq %%mm1,(%1)" // store result + :: "r"(tmpy), "r"(_W)); + // update pointers + ++tmpy; + ++_W; +#endif } } @@ -474,21 +528,56 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) * last digit to copy */ tmpc = c->dp; + +#ifdef LTMSSE + // mm2 has W[ix-1] + asm("movq (%0),%%mm2"::"r"(W)); +#endif + for (ix = 1; ix < digs; ix++) { +#ifndef LTMSSE /* forward the carry from the previous temp */ W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT)); /* now extract the previous digit [below the carry] */ *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); + +#else + asm( + "movq (%0),%%mm1 \n\t" // W[ix] + "movd %%mm2,%%eax \n\t" // get 32-bit version of it W[ix-1] + "psrlq $28,%%mm2 \n\t" // W[ix-1] >> DIGIT_BIT ... must be 28 + "andl $268435455,%%eax \n\t" // & with MP_MASK against W[ix-1] + "paddq %%mm1,%%mm2 \n\t" // add them + "movl %%eax,(%1) \n\t" // store it + :: "r"(&W[ix]), "r"(tmpc) : "%eax"); + ++tmpc; +#endif + } + +#ifndef LTMSSE /* fetch the last digit */ *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK)); +#else + // get last since we don't store into W[ix] anymore ;-) + asm("movd %%mm2,%%eax \n\t" + "andl $268435455,%%eax \n\t" // & with MP_MASK against W[ix-1] + "movl %%eax,(%0)" // store it + ::"r"(tmpc):"%eax"); + ++tmpc; +#endif /* clear unused digits [that existed in the old copy of c] */ for (; ix < olduse; ix++) { *tmpc++ = 0; } } + +#ifdef LTMSSE + asm("emms"); +#endif + mp_clamp (c); return MP_OKAY; } @@ -538,10 +627,14 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) /* like the other comba method we compute the columns first */ pa = a->used; pb = b->used; - XMEMSET (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word)); + memset (W + digs, 0, (pa + pb + 1 - digs) * sizeof (mp_word)); for (ix = 0; ix < pa; ix++) { { - register mp_digit tmpx, *tmpy; +#ifndef LTMSSE + register mp_digit tmpx; +#endif + + register mp_digit *tmpy; register int iy; register mp_word *_W; @@ -549,7 +642,12 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) iy = digs - ix; /* copy of word on the left of A[ix] * B[iy] */ +#ifndef LTMSSE tmpx = a->dp[ix]; +#else +//SSE we load tmpx into mm0 + asm (" movd %0, %%mm0 " :: "r"(a->dp[ix])); +#endif /* alias for right side */ tmpy = b->dp + iy; @@ -569,8 +667,21 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) /* compute column products for digits above the minimum */ for (; iy < pb; iy++) { +#ifndef LTMSSE *_W++ += ((mp_word) tmpx) * ((mp_word)*tmpy++); +#else +// SSE version + asm ("movd (%0), %%mm1 \n\t" // load right side + "pmuludq %%mm0,%%mm1 \n\t" // multiply into left side + "paddq (%1),%%mm1 \n\t" // add 64-bit result out + "movq %%mm1,(%1)" // store result + :: "r"(tmpy), "r"(_W)); + // update pointers + ++tmpy; + ++_W; +#endif } + } } @@ -582,15 +693,46 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) * * See comments in bn_fast_s_mp_mul_digs.c */ +#ifdef LTMSSE + // mm2 has W[ix-1] + asm("movq (%0),%%mm2"::"r"(W + digs)); +#endif + for (ix = digs + 1; ix < newused; ix++) { + /* forward the carry from the previous temp */ +#ifndef LTMSSE W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT)); c->dp[ix - 1] = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); +#else + asm( + "movd %%mm2,%%eax \n\t" // get 32-bit version of it W[ix-1] + "psrlq $28,%%mm2 \n\t" // W[ix-1] >> DIGIT_BIT ... must be 28 + "andl $268435455,%%eax \n\t" // & with MP_MASK against W[ix-1] + "paddq (%0),%%mm2 \n\t" // add them + "movl %%eax,(%1) \n\t" // store it + :: "r"(&W[ix]), "r"(&c->dp[ix-1]) : "%eax"); +#endif + } + +#ifndef LTMSSE c->dp[newused - 1] = (mp_digit) (W[newused - 1] & ((mp_word) MP_MASK)); +#else + // get last since we don't store into W[ix] anymore ;-) + asm("movd %%mm2,%%eax\n\t" + "andl $268435455,%%eax \n\t" // & with MP_MASK against W[ix-1] + "movl %%eax,(%0)" // store it + ::"r"(&(c->dp[newused-1])):"%eax"); +#endif for (; ix < oldused; ix++) { c->dp[ix] = 0; } + +#ifdef LTMSSE + asm("emms"); +#endif + mp_clamp (c); return MP_OKAY; } @@ -638,7 +780,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) /* calculate size of product and allocate as required */ pa = a->used; - newused = pa + pa + 1; + newused = pa + pa; if (b->alloc < newused) { if ((res = mp_grow (b, newused)) != MP_OKAY) { return res; @@ -654,12 +796,15 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) * the inner product can be doubled using n doublings instead of * n**2 */ - XMEMSET (W, 0, newused * sizeof (mp_word)); - XMEMSET (W2, 0, newused * sizeof (mp_word)); + memset (W, 0, newused * sizeof (mp_word)); +#ifndef LTMSSE + memset (W2, 0, newused * sizeof (mp_word)); +#endif /* This computes the inner product. To simplify the inner N**2 loop * the multiplication by two is done afterwards in the N loop. */ + for (ix = 0; ix < pa; ix++) { /* compute the outer product * @@ -668,15 +813,31 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) * there is no need todo a double precision addition * into the W2[] array. */ +#ifndef LTMSSE W2[ix + ix] = ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]); +#else + asm("movd %0,%%xmm0 \n\t" // load a->dp[ix] + "movdq2q %%xmm0,%%mm0 \n\t" // get 64-bit version + "pmuludq %%xmm0,%%xmm0 \n\t" // square it + "movdqu %%xmm0,(%1) \n\t" // store it (8-byte result, 8-byte zero) + ::"r"(a->dp[ix]), "r"(&(W2[ix+ix]))); +#endif { - register mp_digit tmpx, *tmpy; +#ifndef LTMSSE + register mp_digit tmpx; +#endif + register mp_digit *tmpy; register mp_word *_W; register int iy; /* copy of left side */ +#ifndef LTMSSE tmpx = a->dp[ix]; +#else +//SSE we load tmpx into mm0 [note: loaded above] +// asm (" movd %0, %%mm0 " :: "r"(a->dp[ix])); +#endif /* alias for right side */ tmpy = a->dp + (ix + 1); @@ -686,7 +847,19 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) /* inner products */ for (iy = ix + 1; iy < pa; iy++) { +#ifndef LTMSSE *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); +#else +// SSE version + asm ("movd (%0), %%mm1 \n\t" // load right side + "pmuludq %%mm0,%%mm1 \n\t" // multiply into left side + "paddq (%1),%%mm1 \n\t" // add 64-bit result out + "movq %%mm1,(%1)" // store result + :: "r"(tmpy), "r"(_W)); + // update pointers + ++tmpy; + ++_W; +#endif } } } @@ -707,10 +880,19 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) /* double first value, since the inner products are * half of what they should be */ - W[0] += W[0] + W2[0]; - tmpb = b->dp; +#ifndef LTMSSE + W[0] += W[0] + W2[0]; +#else + // mm2 has W[ix-1] + asm("movq (%0),%%mm2 \n\t" // load W[0] + "paddq %%mm2,%%mm2 \n\t" // W[0] + W[0] + "paddq (%1),%%mm2 \n\t" // W[0] + W[0] + W2[0] + ::"r"(W),"r"(W2)); +#endif + for (ix = 1; ix < newused; ix++) { +#ifndef LTMSSE /* double/add next digit */ W[ix] += W[ix] + W2[ix]; @@ -721,12 +903,34 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) * needed */ *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); +#else + asm( "movq (%0),%%mm0 \n\t" // load W[ix] + "movd %%mm2,%%eax \n\t" // 32-bit version of W[ix-1] + "paddq %%mm0,%%mm0 \n\t" // W[ix] + W[ix] + "psrlq $28,%%mm2 \n\t" // W[ix-1] >> DIGIT_BIT ... must be 28 + "paddq (%1),%%mm0 \n\t" // W[ix] + W[ix] + W2[ix] + "andl $268435455,%%eax \n\t" // & with MP_MASK against W[ix-1] + "paddq %%mm0,%%mm2 \n\t" // W[ix] + W[ix] + W2[ix] + W[ix-1]>>DIGIT_BIT + "movl %%eax,(%2) " // store it + :: "r"(&W[ix]), "r"(&W2[ix]), "r"(tmpb):"%eax"); + ++tmpb; +#endif } + +#ifndef LTMSSE /* set the last value. Note even if the carry is zero * this is required since the next step will not zero * it if b originally had a value at b->dp[2*a.used] */ *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK)); +#else + // get last since we don't store into W[ix] anymore ;-) + asm("movd %%mm2,%%eax \n\t" + "andl $268435455,%%eax \n\t" // & with MP_MASK against W[ix-1] + "movl %%eax,(%0) " // store it + ::"r"(tmpb):"%eax"); + ++tmpb; +#endif /* clear high digits of b if there were any originally */ for (; ix < olduse; ix++) { @@ -734,6 +938,10 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) } } +#ifdef LTMSSE + asm("emms"); +#endif + mp_clamp (b); return MP_OKAY; } @@ -1142,10 +1350,14 @@ mp_clamp (mp_int * a) void mp_clear (mp_int * a) { + int i; + /* only do anything if a hasn't been freed previously */ if (a->dp != NULL) { /* first zero the digits */ - XMEMSET (a->dp, 0, sizeof (mp_digit) * a->used); + for (i = 0; i < a->used; i++) { + a->dp[i] = 0; + } /* free ram */ XFREE(a->dp); @@ -3083,15 +3295,22 @@ int mp_grow (mp_int * a, int size) */ #include -/* init a new bigint */ +/* init a new mp_int */ int mp_init (mp_int * a) { + int i; + /* allocate memory required and clear it */ - a->dp = OPT_CAST(mp_digit) XCALLOC (sizeof (mp_digit), MP_PREC); + a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC); if (a->dp == NULL) { return MP_MEM; } + /* set the digits to zero */ + for (i = 0; i < MP_PREC; i++) { + a->dp[i] = 0; + } + /* set the used to zero, allocated digits to the default precision * and sign to positive */ a->used = 0; @@ -7538,7 +7757,7 @@ mp_zero (mp_int * a) { a->sign = MP_ZPOS; a->used = 0; - XMEMSET (a->dp, 0, sizeof (mp_digit) * a->alloc); + memset (a->dp, 0, sizeof (mp_digit) * a->alloc); } /* End: bn_mp_zero.c */ @@ -8396,6 +8615,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c) CPU /Compiler /MUL CUTOFF/SQR CUTOFF ------------------------------------------------------------- + Intel P4 Northwood /GCC v3.3.3 / 121/ 128/SSE patches ;-) Intel P4 Northwood /GCC v3.3.3 / 59/ 81/profiled build Intel P4 Northwood /GCC v3.3.3 / 59/ 80/profiled_single build Intel P4 Northwood /ICC v8.0 / 57/ 70/profiled build @@ -8404,8 +8624,8 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c) */ -int KARATSUBA_MUL_CUTOFF = 57, /* Min. number of digits before Karatsuba multiplication is used. */ - KARATSUBA_SQR_CUTOFF = 70, /* Min. number of digits before Karatsuba squaring is used. */ +int KARATSUBA_MUL_CUTOFF = 121, /* Min. number of digits before Karatsuba multiplication is used. */ + KARATSUBA_SQR_CUTOFF = 128, /* Min. number of digits before Karatsuba squaring is used. */ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_SQR_CUTOFF = 400; diff --git a/mycrypt.h b/mycrypt.h index 887bc43..43a93e9 100644 --- a/mycrypt.h +++ b/mycrypt.h @@ -16,8 +16,8 @@ extern "C" { #endif /* version */ -#define CRYPT 0x0097 -#define SCRYPT "0.97a" +#define CRYPT 0x0098 +#define SCRYPT "0.98" /* max size of either a cipher/hash block or symmetric key [largest of the two] */ #define MAXBLOCKSIZE 64 diff --git a/mycrypt_cfg.h b/mycrypt_cfg.h index 175c0ea..d24824e 100644 --- a/mycrypt_cfg.h +++ b/mycrypt_cfg.h @@ -8,10 +8,10 @@ #define MYCRYPT_CFG_H /* you can change how memory allocation works ... */ - void *XMALLOC(size_t n); - void *REALLOC(void *p, size_t n); - void *XCALLOC(size_t n, size_t s); - void XFREE(void *p); +void *XMALLOC(size_t n); +void *REALLOC(void *p, size_t n); +void *XCALLOC(size_t n, size_t s); +void XFREE(void *p); /* change the clock function too */ clock_t XCLOCK(void); diff --git a/mycrypt_custom.h b/mycrypt_custom.h index 980b799..1e00830 100644 --- a/mycrypt_custom.h +++ b/mycrypt_custom.h @@ -5,10 +5,6 @@ #ifndef MYCRYPT_CUSTOM_H_ #define MYCRYPT_CUSTOM_H_ -#ifdef CRYPT - #error mycrypt_custom.h should be included before mycrypt.h -#endif - /* macros for various libc functions you can change for embedded targets */ #define XMALLOC malloc #define XREALLOC realloc @@ -28,7 +24,7 @@ #define LTC_TEST /* clean the stack of functions which put private information on stack */ -//#define CLEAN_STACK +// #define CLEAN_STACK /* disable all file related functions */ //#define NO_FILE @@ -43,8 +39,8 @@ #define XTEA #define TWOFISH #define TWOFISH_TABLES -//#define TWOFISH_ALL_TABLES -//#define TWOFISH_SMALL +// #define TWOFISH_ALL_TABLES +// #define TWOFISH_SMALL #define DES #define CAST5 #define NOEKEON @@ -92,6 +88,13 @@ #define YARROW_AES 0 #define SPRNG #define RC4 + +/* Fortuna */ +#define FORTUNA +/* reseed every N calls to the read function */ +#define FORTUNA_WD 1024 + + #define DEVRANDOM #define TRY_URANDOM_FIRST @@ -133,11 +136,12 @@ /* Include the MPI functionality? (required by the PK algorithms) */ #define MPI +/* Use SSE2 optimizations in LTM? Requires GCC or ICC and a P4 or K8 processor */ +// #define LTMSSE + /* PKCS #1 and #5 stuff */ #define PKCS_1 #define PKCS_5 -#include - #endif diff --git a/mycrypt_hash.h b/mycrypt_hash.h index dc828a4..b661d12 100644 --- a/mycrypt_hash.h +++ b/mycrypt_hash.h @@ -276,7 +276,7 @@ typedef struct Hmac_state { hash_state md; int hash; hash_state hashstate; - unsigned char key[MAXBLOCKSIZE]; + unsigned char *key; } hmac_state; int hmac_init(hmac_state *hmac, int hash, const unsigned char *key, unsigned long keylen); diff --git a/mycrypt_pk.h b/mycrypt_pk.h index 148ead3..9afacda 100644 --- a/mycrypt_pk.h +++ b/mycrypt_pk.h @@ -113,6 +113,7 @@ typedef struct Rsa_key { void rsa_free(rsa_key *key); +/* These use PKCS #1 v2.0 padding */ int rsa_encrypt_key(const unsigned char *inkey, unsigned long inlen, unsigned char *outkey, unsigned long *outlen, const unsigned char *lparam, unsigned long lparamlen, @@ -137,6 +138,30 @@ int rsa_verify_hash(const unsigned char *sig, unsigned long siglen, int hash_idx, unsigned long saltlen, int *stat, rsa_key *key); +/* these use PKCS #1 v1.5 padding */ +int rsa_v15_encrypt_key(const unsigned char *inkey, unsigned long inlen, + unsigned char *outkey, unsigned long *outlen, + prng_state *prng, int prng_idx, + rsa_key *key); + +int rsa_v15_decrypt_key(const unsigned char *in, unsigned long inlen, + unsigned char *outkey, unsigned long keylen, + prng_state *prng, int prng_idx, + int *res, rsa_key *key); + +int rsa_v15_sign_hash(const unsigned char *msghash, unsigned long msghashlen, + unsigned char *sig, unsigned long *siglen, + prng_state *prng, int prng_idx, + int hash_idx, rsa_key *key); + +int rsa_v15_verify_hash(const unsigned char *sig, unsigned long siglen, + const unsigned char *msghash, unsigned long msghashlen, + prng_state *prng, int prng_idx, + int hash_idx, int *stat, + rsa_key *key); + + +/* PKCS #1 import/export */ int rsa_export(unsigned char *out, unsigned long *outlen, int type, rsa_key *key); int rsa_import(const unsigned char *in, unsigned long inlen, rsa_key *key); diff --git a/mycrypt_prng.h b/mycrypt_prng.h index 64d862f..3ded2cc 100644 --- a/mycrypt_prng.h +++ b/mycrypt_prng.h @@ -10,9 +10,25 @@ struct rc4_prng { unsigned char buf[256]; }; +struct fortuna_prng { + hash_state pool[32]; /* the 32 pools */ + + symmetric_key skey; + + unsigned char K[32], /* the current key */ + IV[16]; /* IV for CTR mode */ + + unsigned long pool_idx, /* current pool we will add to */ + pool0_len, /* length of 0'th pool */ + wd; + + ulong64 reset_cnt; /* number of times we have reset */ +}; + typedef union Prng_state { struct yarrow_prng yarrow; struct rc4_prng rc4; + struct fortuna_prng fortuna; } prng_state; extern struct _prng_descriptor { @@ -20,7 +36,10 @@ extern struct _prng_descriptor { int (*start)(prng_state *); int (*add_entropy)(const unsigned char *, unsigned long, prng_state *); int (*ready)(prng_state *); - unsigned long (*read)(unsigned char *, unsigned long len, prng_state *); + unsigned long (*read)(unsigned char *, unsigned long, prng_state *); + void (*done)(prng_state *); + int (*export)(unsigned char *, unsigned long *, prng_state *); + int (*import)(const unsigned char *, unsigned long, prng_state *); } prng_descriptor[]; #ifdef YARROW @@ -28,14 +47,31 @@ extern struct _prng_descriptor { int yarrow_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng); int yarrow_ready(prng_state *prng); unsigned long yarrow_read(unsigned char *buf, unsigned long len, prng_state *prng); + void yarrow_done(prng_state *prng); + int yarrow_export(unsigned char *out, unsigned long *outlen, prng_state *prng); + int yarrow_import(const unsigned char *in, unsigned long inlen, prng_state *prng); extern const struct _prng_descriptor yarrow_desc; #endif +#ifdef FORTUNA + int fortuna_start(prng_state *prng); + int fortuna_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng); + int fortuna_ready(prng_state *prng); + unsigned long fortuna_read(unsigned char *buf, unsigned long len, prng_state *prng); + void fortuna_done(prng_state *prng); + int fortuna_export(unsigned char *out, unsigned long *outlen, prng_state *prng); + int fortuna_import(const unsigned char *in, unsigned long inlen, prng_state *prng); + extern const struct _prng_descriptor fortuna_desc; +#endif + #ifdef RC4 int rc4_start(prng_state *prng); int rc4_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng); int rc4_ready(prng_state *prng); unsigned long rc4_read(unsigned char *buf, unsigned long len, prng_state *prng); + void rc4_done(prng_state *prng); + int rc4_export(unsigned char *out, unsigned long *outlen, prng_state *prng); + int rc4_import(const unsigned char *in, unsigned long inlen, prng_state *prng); extern const struct _prng_descriptor rc4_desc; #endif @@ -44,6 +80,9 @@ extern struct _prng_descriptor { int sprng_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng); int sprng_ready(prng_state *prng); unsigned long sprng_read(unsigned char *buf, unsigned long len, prng_state *prng); + void sprng_done(prng_state *prng); + int sprng_export(unsigned char *out, unsigned long *outlen, prng_state *prng); + int sprng_import(const unsigned char *in, unsigned long inlen, prng_state *prng); extern const struct _prng_descriptor sprng_desc; #endif diff --git a/pkcs_5_2.c b/pkcs_5_2.c index 7bc2786..a58994f 100644 --- a/pkcs_5_2.c +++ b/pkcs_5_2.c @@ -52,7 +52,7 @@ int pkcs_5_alg2(const unsigned char *password, unsigned long password_len, stored = 0; while (left != 0) { /* process block number blkno */ - zeromem(buf, MAXBLOCKSIZE*2); + zeromem(buf[0], MAXBLOCKSIZE*2); /* store current block number and increment for next pass */ STORE32H(blkno, buf[1]); @@ -75,7 +75,7 @@ int pkcs_5_alg2(const unsigned char *password, unsigned long password_len, /* now compute repeated and XOR it in buf[1] */ XMEMCPY(buf[1], buf[0], x); - for (itts = 2; itts < iteration_count; ++itts) { + for (itts = 1; itts < iteration_count; ++itts) { if ((err = hmac_memory(hash_idx, password, password_len, buf[0], x, buf[0], &x)) != CRYPT_OK) { goto __ERR; } diff --git a/rc2.c b/rc2.c index a8f1d2d..f3f8c31 100644 --- a/rc2.c +++ b/rc2.c @@ -33,17 +33,8 @@ const struct _cipher_descriptor rc2_desc = { &rc2_keysize }; - -/**********************************************************************\ -* Expand a variable-length user key (between 1 and 128 bytes) to a * -* 64-short working rc2 key, of at most "bits" effective key bits. * -* The effective key bits parameter looks like an export control hack. * -* For normal use, it should always be set to 1024. For convenience, * -* zero is accepted as an alias for 1024. * -\**********************************************************************/ - - /* 256-entry permutation table, probably derived somehow from pi */ - static const unsigned char permute[256] = { +/* 256-entry permutation table, probably derived somehow from pi */ +static const unsigned char permute[256] = { 217,120,249,196, 25,221,181,237, 40,233,253,121, 74,160,216,157, 198,126, 55,131, 43,118, 83,142, 98, 76,100,136, 68,139,251,162, 23,154, 89,245,135,179, 79, 19, 97, 69,109,141, 9,129,125, 50, @@ -60,7 +51,7 @@ const struct _cipher_descriptor rc2_desc = { 211, 0,230,207,225,158,168, 44, 99, 22, 1, 63, 88,226,137,169, 13, 56, 52, 27,171, 51,255,176,187, 72, 12, 95,185,177,205, 46, 197,243,219, 71,229,165,156,119, 10,166, 32,104,254,127,193,173 - }; +}; int rc2_setup(const unsigned char *key, int keylen, int rounds, symmetric_key *skey) { @@ -87,24 +78,23 @@ int rc2_setup(const unsigned char *key, int keylen, int rounds, symmetric_key *s /* Phase 1: Expand input key to 128 bytes */ if (keylen < 128) { for (i = keylen; i < 128; i++) { - tmp[i] = permute[(int)((tmp[i - 1] + tmp[i - keylen]) & 255)]; + tmp[i] = permute[(tmp[i - 1] + tmp[i - keylen]) & 255]; } } /* Phase 2 - reduce effective key size to "bits" */ - bits = keylen*8; + bits = keylen<<3; T8 = (unsigned)(bits+7)>>3; TM = (255 >> (unsigned)(7 & -bits)); - tmp[128 - T8] = permute[(int)(tmp[128 - T8] & TM)]; + tmp[128 - T8] = permute[tmp[128 - T8] & TM]; for (i = 127 - T8; i >= 0; i--) { - tmp[i] = permute[(int)(tmp[i + 1] ^ tmp[i + T8])]; + tmp[i] = permute[tmp[i + 1] ^ tmp[i + T8]]; } /* Phase 3 - copy to xkey in little-endian order */ - i = 63; - do { + for (i = 0; i < 64; i++) { xkey[i] = (unsigned)tmp[2*i] + ((unsigned)tmp[2*i+1] << 8); - } while (i-- > 0); + } #ifdef CLEAN_STACK zeromem(tmp, sizeof(tmp)); @@ -129,9 +119,9 @@ void rc2_ecb_encrypt( const unsigned char *plain, unsigned *xkey; unsigned x76, x54, x32, x10, i; - _ARGCHK(plain != NULL); + _ARGCHK(plain != NULL); _ARGCHK(cipher != NULL); - _ARGCHK(skey != NULL); + _ARGCHK(skey != NULL); xkey = skey->rc2.xkey; @@ -142,16 +132,16 @@ void rc2_ecb_encrypt( const unsigned char *plain, for (i = 0; i < 16; i++) { x10 = (x10 + (x32 & ~x76) + (x54 & x76) + xkey[4*i+0]) & 0xFFFF; - x10 = ((x10 << 1) | (x10 >> 15)) & 0xFFFF; + x10 = ((x10 << 1) | (x10 >> 15)); x32 = (x32 + (x54 & ~x10) + (x76 & x10) + xkey[4*i+1]) & 0xFFFF; - x32 = ((x32 << 2) | (x32 >> 14)) & 0xFFFF; + x32 = ((x32 << 2) | (x32 >> 14)); x54 = (x54 + (x76 & ~x32) + (x10 & x32) + xkey[4*i+2]) & 0xFFFF; - x54 = ((x54 << 3) | (x54 >> 13)) & 0xFFFF; + x54 = ((x54 << 3) | (x54 >> 13)); x76 = (x76 + (x10 & ~x54) + (x32 & x54) + xkey[4*i+3]) & 0xFFFF; - x76 = ((x76 << 5) | (x76 >> 11)) & 0xFFFF; + x76 = ((x76 << 5) | (x76 >> 11)); if (i == 4 || i == 10) { x10 = (x10 + xkey[x76 & 63]) & 0xFFFF; @@ -199,9 +189,9 @@ void rc2_ecb_decrypt( const unsigned char *cipher, unsigned *xkey; int i; - _ARGCHK(plain != NULL); + _ARGCHK(plain != NULL); _ARGCHK(cipher != NULL); - _ARGCHK(skey != NULL); + _ARGCHK(skey != NULL); xkey = skey->rc2.xkey; @@ -218,16 +208,16 @@ void rc2_ecb_decrypt( const unsigned char *cipher, x10 = (x10 - xkey[x76 & 63]) & 0xFFFF; } - x76 = ((x76 << 11) | (x76 >> 5)) & 0xFFFF; + x76 = ((x76 << 11) | (x76 >> 5)); x76 = (x76 - ((x10 & ~x54) + (x32 & x54) + xkey[4*i+3])) & 0xFFFF; - x54 = ((x54 << 13) | (x54 >> 3)) & 0xFFFF; + x54 = ((x54 << 13) | (x54 >> 3)); x54 = (x54 - ((x76 & ~x32) + (x10 & x32) + xkey[4*i+2])) & 0xFFFF; - x32 = ((x32 << 14) | (x32 >> 2)) & 0xFFFF; + x32 = ((x32 << 14) | (x32 >> 2)); x32 = (x32 - ((x54 & ~x10) + (x76 & x10) + xkey[4*i+1])) & 0xFFFF; - x10 = ((x10 << 15) | (x10 >> 1)) & 0xFFFF; + x10 = ((x10 << 15) | (x10 >> 1)); x10 = (x10 - ((x32 & ~x76) + (x54 & x76) + xkey[4*i+0])) & 0xFFFF; } diff --git a/rc4.c b/rc4.c index e218f9b..b340204 100644 --- a/rc4.c +++ b/rc4.c @@ -18,7 +18,10 @@ const struct _prng_descriptor rc4_desc = &rc4_start, &rc4_add_entropy, &rc4_ready, - &rc4_read + &rc4_read, + &rc4_done, + &rc4_export, + &rc4_import }; int rc4_start(prng_state *prng) @@ -96,12 +99,31 @@ unsigned long rc4_read(unsigned char *buf, unsigned long len, prng_state *prng) y = (y + s[x]) & 255; tmp = s[x]; s[x] = s[y]; s[y] = tmp; tmp = (s[x] + s[y]) & 255; - *buf++ ^= s[tmp]; + *buf++ = s[tmp]; } prng->rc4.x = x; prng->rc4.y = y; return n; } +void rc4_done(prng_state *prng) +{ + _ARGCHK(prng != NULL); +} + +int rc4_export(unsigned char *out, unsigned long *outlen, prng_state *prng) +{ + _ARGCHK(outlen != NULL); + + *outlen = 0; + return CRYPT_OK; +} + +int rc4_import(const unsigned char *in, unsigned long inlen, prng_state *prng) +{ + return CRYPT_OK; +} + + #endif diff --git a/rsa_decrypt_key.c b/rsa_decrypt_key.c index d6b6553..ccdc8b1 100644 --- a/rsa_decrypt_key.c +++ b/rsa_decrypt_key.c @@ -30,7 +30,10 @@ int rsa_decrypt_key(const unsigned char *in, unsigned long inlen, _ARGCHK(key != NULL); _ARGCHK(res != NULL); - /* valid hash ? */ + /* valid hash/prng ? */ + if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) { + return err; + } if ((err = hash_is_valid(hash_idx)) != CRYPT_OK) { return err; } diff --git a/rsa_v15_decrypt_key.c b/rsa_v15_decrypt_key.c new file mode 100644 index 0000000..fa187ba --- /dev/null +++ b/rsa_v15_decrypt_key.c @@ -0,0 +1,63 @@ + /* LibTomCrypt, modular cryptographic library -- Tom St Denis + * + * LibTomCrypt is a library that provides various cryptographic + * algorithms in a highly modular and flexible manner. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org + */ + +#include "mycrypt.h" + +#ifdef MRSA + +/* decrypt then PKCS #1 v1.5 depad */ +int rsa_v15_decrypt_key(const unsigned char *in, unsigned long inlen, + unsigned char *outkey, unsigned long keylen, + prng_state *prng, int prng_idx, + int *res, rsa_key *key) +{ + unsigned long modulus_bitlen, modulus_bytelen, x; + int err; + unsigned char *tmp; + + _ARGCHK(outkey != NULL); + _ARGCHK(key != NULL); + _ARGCHK(res != NULL); + + /* valid prng ? */ + if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) { + return err; + } + + /* get modulus len in bits */ + modulus_bitlen = mp_count_bits(&(key->N)); + + /* outlen must be at least the size of the modulus */ + modulus_bytelen = mp_unsigned_bin_size(&(key->N)); + if (modulus_bytelen != inlen) { + return CRYPT_INVALID_PACKET; + } + + /* allocate ram */ + tmp = XMALLOC(inlen); + if (tmp == NULL) { + return CRYPT_MEM; + } + + /* rsa decode the packet */ + x = inlen; + if ((err = rsa_exptmod(in, inlen, tmp, &x, PK_PRIVATE, prng, prng_idx, key)) != CRYPT_OK) { + XFREE(tmp); + return err; + } + + /* PKCS #1 v1.5 depad */ + err = pkcs_1_v15_es_decode(tmp, x, modulus_bitlen, outkey, keylen, res); + XFREE(tmp); + return err; +} + +#endif diff --git a/rsa_v15_encrypt_key.c b/rsa_v15_encrypt_key.c new file mode 100644 index 0000000..3724a72 --- /dev/null +++ b/rsa_v15_encrypt_key.c @@ -0,0 +1,54 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis + * + * LibTomCrypt is a library that provides various cryptographic + * algorithms in a highly modular and flexible manner. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org + */ + +#include "mycrypt.h" + +#ifdef MRSA + +/* PKCS #1 v1.5 pad then encrypt */ +int rsa_v15_encrypt_key(const unsigned char *inkey, unsigned long inlen, + unsigned char *outkey, unsigned long *outlen, + prng_state *prng, int prng_idx, + rsa_key *key) +{ + unsigned long modulus_bitlen, modulus_bytelen, x; + int err; + + _ARGCHK(inkey != NULL); + _ARGCHK(outkey != NULL); + _ARGCHK(outlen != NULL); + _ARGCHK(key != NULL); + + /* valid prng? */ + if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) { + return err; + } + + /* get modulus len in bits */ + modulus_bitlen = mp_count_bits(&(key->N)); + + /* outlen must be at least the size of the modulus */ + modulus_bytelen = mp_unsigned_bin_size(&(key->N)); + if (modulus_bytelen > *outlen) { + return CRYPT_BUFFER_OVERFLOW; + } + + /* pad it */ + x = *outlen; + if ((err = pkcs_1_v15_es_encode(inkey, inlen, modulus_bitlen, prng, prng_idx, outkey, &x)) != CRYPT_OK) { + return err; + } + + /* encrypt it */ + return rsa_exptmod(outkey, x, outkey, outlen, PK_PUBLIC, prng, prng_idx, key); +} + +#endif diff --git a/rsa_v15_sign_hash.c b/rsa_v15_sign_hash.c new file mode 100644 index 0000000..0a3da2a --- /dev/null +++ b/rsa_v15_sign_hash.c @@ -0,0 +1,57 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis + * + * LibTomCrypt is a library that provides various cryptographic + * algorithms in a highly modular and flexible manner. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org + */ + +#include "mycrypt.h" + +#ifdef MRSA + +/* PKCS #1 v1.5 pad then sign */ +int rsa_v15_sign_hash(const unsigned char *msghash, unsigned long msghashlen, + unsigned char *sig, unsigned long *siglen, + prng_state *prng, int prng_idx, + int hash_idx, rsa_key *key) +{ + unsigned long modulus_bitlen, modulus_bytelen, x; + int err; + + _ARGCHK(msghash != NULL); + _ARGCHK(sig != NULL); + _ARGCHK(siglen != NULL); + _ARGCHK(key != NULL); + + /* valid prng and hash ? */ + if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) { + return err; + } + if ((err = hash_is_valid(hash_idx)) != CRYPT_OK) { + return err; + } + + /* get modulus len in bits */ + modulus_bitlen = mp_count_bits(&(key->N)); + + /* outlen must be at least the size of the modulus */ + modulus_bytelen = mp_unsigned_bin_size(&(key->N)); + if (modulus_bytelen > *siglen) { + return CRYPT_BUFFER_OVERFLOW; + } + + /* PKCS #1 v1.5 pad the key */ + x = *siglen; + if ((err = pkcs_1_v15_sa_encode(msghash, msghashlen, hash_idx, modulus_bitlen, sig, &x)) != CRYPT_OK) { + return err; + } + + /* RSA encode it */ + return rsa_exptmod(sig, x, sig, siglen, PK_PRIVATE, prng, prng_idx, key); +} + +#endif diff --git a/rsa_v15_verify_hash.c b/rsa_v15_verify_hash.c new file mode 100644 index 0000000..80ca72a --- /dev/null +++ b/rsa_v15_verify_hash.c @@ -0,0 +1,69 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis + * + * LibTomCrypt is a library that provides various cryptographic + * algorithms in a highly modular and flexible manner. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.org + */ + +#include "mycrypt.h" + +#ifdef MRSA + +/* design then PKCS v1.5 depad */ +int rsa_v15_verify_hash(const unsigned char *sig, unsigned long siglen, + const unsigned char *msghash, unsigned long msghashlen, + prng_state *prng, int prng_idx, + int hash_idx, int *stat, + rsa_key *key) +{ + unsigned long modulus_bitlen, modulus_bytelen, x; + int err; + unsigned char *tmpbuf; + + _ARGCHK(msghash != NULL); + _ARGCHK(sig != NULL); + _ARGCHK(stat != NULL); + _ARGCHK(key != NULL); + + /* valid hash ? */ + if ((err = hash_is_valid(hash_idx)) != CRYPT_OK) { + return err; + } + + if ((err = prng_is_valid(prng_idx)) != CRYPT_OK) { + return err; + } + + /* get modulus len in bits */ + modulus_bitlen = mp_count_bits(&(key->N)); + + /* outlen must be at least the size of the modulus */ + modulus_bytelen = mp_unsigned_bin_size(&(key->N)); + if (modulus_bytelen != siglen) { + return CRYPT_INVALID_PACKET; + } + + /* allocate temp buffer for decoded sig */ + tmpbuf = XMALLOC(siglen); + if (tmpbuf == NULL) { + return CRYPT_MEM; + } + + /* RSA decode it */ + x = siglen; + if ((err = rsa_exptmod(sig, siglen, tmpbuf, &x, PK_PUBLIC, prng, prng_idx, key)) != CRYPT_OK) { + XFREE(tmpbuf); + return err; + } + + /* PSS decode it */ + err = pkcs_1_v15_sa_decode(msghash, msghashlen, tmpbuf, x, hash_idx, modulus_bitlen, stat); + XFREE(tmpbuf); + return err; +} + +#endif diff --git a/sprng.c b/sprng.c index db6e338..f1de461 100644 --- a/sprng.c +++ b/sprng.c @@ -23,7 +23,11 @@ const struct _prng_descriptor sprng_desc = &sprng_start, &sprng_add_entropy, &sprng_ready, - &sprng_read + &sprng_read, + &sprng_done, + &sprng_export, + &sprng_import + }; int sprng_start(prng_state *prng) @@ -47,6 +51,24 @@ unsigned long sprng_read(unsigned char *buf, unsigned long len, prng_state *prng return rng_get_bytes(buf, len, NULL); } +void sprng_done(prng_state *prng) +{ + _ARGCHK(prng != NULL); +} + +int sprng_export(unsigned char *out, unsigned long *outlen, prng_state *prng) +{ + _ARGCHK(outlen != NULL); + + *outlen = 0; + return CRYPT_OK; +} + +int sprng_import(const unsigned char *in, unsigned long inlen, prng_state *prng) +{ + return CRYPT_OK; +} + #endif diff --git a/yarrow.c b/yarrow.c index 4415f78..7354d47 100644 --- a/yarrow.c +++ b/yarrow.c @@ -19,7 +19,10 @@ const struct _prng_descriptor yarrow_desc = &yarrow_start, &yarrow_add_entropy, &yarrow_ready, - &yarrow_read + &yarrow_read, + &yarrow_done, + &yarrow_export, + &yarrow_import }; int yarrow_start(prng_state *prng) @@ -180,5 +183,50 @@ unsigned long yarrow_read(unsigned char *buf, unsigned long len, prng_state *prn return len; } +void yarrow_done(prng_state *prng) +{ + _ARGCHK(prng != NULL); + /* call cipher done when we invent one ;-) */ +} + +int yarrow_export(unsigned char *out, unsigned long *outlen, prng_state *prng) +{ + _ARGCHK(out != NULL); + _ARGCHK(outlen != NULL); + _ARGCHK(prng != NULL); + + /* we'll write 64 bytes for s&g's */ + if (*outlen < 64) { + return CRYPT_BUFFER_OVERFLOW; + } + + if (yarrow_read(out, 64, prng) != 64) { + return CRYPT_ERROR_READPRNG; + } + *outlen = 64; + + return CRYPT_OK; +} + +int yarrow_import(const unsigned char *in, unsigned long inlen, prng_state *prng) +{ + int err; + + _ARGCHK(in != NULL); + _ARGCHK(prng != NULL); + + if (inlen != 64) { + return CRYPT_INVALID_ARG; + } + + if ((err = yarrow_start(prng)) != CRYPT_OK) { + return err; + } + if ((err = yarrow_add_entropy(in, 64, &prng)) != CRYPT_OK) { + return err; + } + return yarrow_ready(&prng); +} + #endif