/* AES implementation by Tom St Denis * * Derived from the Public Domain source code by --- * rijndael-alg-fst.c * * @version 3.0 (December 2000) * * Optimised ANSI C code for the Rijndael cipher (now AES) * * @author Vincent Rijmen * @author Antoon Bosselaers * @author Paulo Barreto --- */ #include "mycrypt.h" #ifdef RIJNDAEL const struct _cipher_descriptor rijndael_desc = { "rijndael", 6, 16, 32, 16, 10, &rijndael_setup, &rijndael_ecb_encrypt, &rijndael_ecb_decrypt, &rijndael_test, &rijndael_keysize }; const struct _cipher_descriptor aes_desc = { "aes", 6, 16, 32, 16, 10, &rijndael_setup, &rijndael_ecb_encrypt, &rijndael_ecb_decrypt, &rijndael_test, &rijndael_keysize }; #include "aes_tab.c" int rijndael_setup(const unsigned char *key, int keylen, int rounds, symmetric_key *skey) { int i = 0, j; unsigned long temp, *rk, *rrk; _ARGCHK(key != NULL); _ARGCHK(skey != NULL); if (keylen != 16 && keylen != 24 && keylen != 32) { return CRYPT_INVALID_KEYSIZE; } if (rounds != 0 && rounds != (10 + ((keylen/8)-2)*2)) { return CRYPT_INVALID_ROUNDS; } skey->rijndael.Nr = 10 + ((keylen/8)-2)*2; /* setup the forward key */ rk = skey->rijndael.eK; LOAD32H(rk[0], key ); LOAD32H(rk[1], key + 4); LOAD32H(rk[2], key + 8); LOAD32H(rk[3], key + 12); if (keylen == 16) { j = 44; for (;;) { temp = rk[3]; rk[4] = rk[0] ^ (Te4_3[byte(temp, 2)]) ^ (Te4_2[byte(temp, 1)]) ^ (Te4_1[byte(temp, 0)]) ^ (Te4_0[byte(temp, 3)]) ^ rcon[i]; rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; rk[7] = rk[3] ^ rk[6]; if (++i == 10) { break; } rk += 4; } } else if (keylen == 24) { j = 52; LOAD32H(rk[4], key + 16); LOAD32H(rk[5], key + 20); for (;;) { #ifdef _MSC_VER temp = skey->rijndael.eK[rk - skey->rijndael.eK + 5]; #else temp = rk[5]; #endif rk[ 6] = rk[ 0] ^ (Te4_3[byte(temp, 2)]) ^ (Te4_2[byte(temp, 1)]) ^ (Te4_1[byte(temp, 0)]) ^ (Te4_0[byte(temp, 3)]) ^ rcon[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 8] = rk[ 2] ^ rk[ 7]; rk[ 9] = rk[ 3] ^ rk[ 8]; if (++i == 8) { break; } rk[10] = rk[ 4] ^ rk[ 9]; rk[11] = rk[ 5] ^ rk[10]; rk += 6; } } else if (keylen == 32) { j = 60; LOAD32H(rk[4], key + 16); LOAD32H(rk[5], key + 20); LOAD32H(rk[6], key + 24); LOAD32H(rk[7], key + 28); for (;;) { #ifdef _MSC_VER temp = skey->rijndael.eK[rk - skey->rijndael.eK + 7]; #else temp = rk[7]; #endif rk[ 8] = rk[ 0] ^ (Te4_3[byte(temp, 2)]) ^ (Te4_2[byte(temp, 1)]) ^ (Te4_1[byte(temp, 0)]) ^ (Te4_0[byte(temp, 3)]) ^ rcon[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; rk[10] = rk[ 2] ^ rk[ 9]; rk[11] = rk[ 3] ^ rk[10]; if (++i == 7) { break; } temp = rk[11]; rk[12] = rk[ 4] ^ (Te4_3[byte(temp, 3)]) ^ (Te4_2[byte(temp, 2)]) ^ (Te4_1[byte(temp, 1)]) ^ (Te4_0[byte(temp, 0)]); rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; rk[15] = rk[ 7] ^ rk[14]; rk += 8; } } else { /* this can't happen */ j = 4; } /* setup the inverse key now */ rk = skey->rijndael.dK; rrk = skey->rijndael.eK + j - 4; /* apply the inverse MixColumn transform to all round keys but the first and the last: */ /* copy first */ *rk++ = *rrk++; *rk++ = *rrk++; *rk++ = *rrk++; *rk = *rrk; rk -= 3; rrk -= 3; for (i = 1; i < skey->rijndael.Nr; i++) { rrk -= 4; rk += 4; #ifdef SMALL_CODE temp = rrk[0]; rk[0] = Td0[255 & Te4[byte(temp, 3)]] ^ Td1[255 & Te4[byte(temp, 2)]] ^ Td2[255 & Te4[byte(temp, 1)]] ^ Td3[255 & Te4[byte(temp, 0)]]; temp = rrk[1]; rk[1] = Td0[255 & Te4[byte(temp, 3)]] ^ Td1[255 & Te4[byte(temp, 2)]] ^ Td2[255 & Te4[byte(temp, 1)]] ^ Td3[255 & Te4[byte(temp, 0)]]; temp = rrk[2]; rk[2] = Td0[255 & Te4[byte(temp, 3)]] ^ Td1[255 & Te4[byte(temp, 2)]] ^ Td2[255 & Te4[byte(temp, 1)]] ^ Td3[255 & Te4[byte(temp, 0)]]; temp = rrk[3]; rk[3] = Td0[255 & Te4[byte(temp, 3)]] ^ Td1[255 & Te4[byte(temp, 2)]] ^ Td2[255 & Te4[byte(temp, 1)]] ^ Td3[255 & Te4[byte(temp, 0)]]; #else temp = rrk[0]; rk[0] = Tks0[byte(temp, 3)] ^ Tks1[byte(temp, 2)] ^ Tks2[byte(temp, 1)] ^ Tks3[byte(temp, 0)]; temp = rrk[1]; rk[1] = Tks0[byte(temp, 3)] ^ Tks1[byte(temp, 2)] ^ Tks2[byte(temp, 1)] ^ Tks3[byte(temp, 0)]; temp = rrk[2]; rk[2] = Tks0[byte(temp, 3)] ^ Tks1[byte(temp, 2)] ^ Tks2[byte(temp, 1)] ^ Tks3[byte(temp, 0)]; temp = rrk[3]; rk[3] = Tks0[byte(temp, 3)] ^ Tks1[byte(temp, 2)] ^ Tks2[byte(temp, 1)] ^ Tks3[byte(temp, 0)]; #endif } /* copy last */ rrk -= 4; rk += 4; *rk++ = *rrk++; *rk++ = *rrk++; *rk++ = *rrk++; *rk = *rrk; return CRYPT_OK; } void rijndael_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey) { unsigned long s0, s1, s2, s3, t0, t1, t2, t3, *rk; int Nr, r; _ARGCHK(pt != NULL); _ARGCHK(ct != NULL); _ARGCHK(skey != NULL); Nr = skey->rijndael.Nr; rk = skey->rijndael.eK; /* * map byte array block to cipher state * and add initial round key: */ LOAD32H(s0, pt ); s0 ^= rk[0]; LOAD32H(s1, pt + 4); s1 ^= rk[1]; LOAD32H(s2, pt + 8); s2 ^= rk[2]; LOAD32H(s3, pt + 12); s3 ^= rk[3]; /* * Nr - 1 full rounds: */ r = Nr >> 1; for (;;) { /* Both of these blocks are equivalent except the top is more friendlier for x86 processors */ #if defined(__GNUC__) t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7]; t1 ^= Te3[byte(s0, 0)]; t2 ^= Te2[byte(s0, 1)]; t3 ^= Te1[byte(s0, 2)]; t0 ^= Te0[byte(s0, 3)]; t2 ^= Te3[byte(s1, 0)]; t3 ^= Te2[byte(s1, 1)]; t0 ^= Te1[byte(s1, 2)]; t1 ^= Te0[byte(s1, 3)]; t3 ^= Te3[byte(s2, 0)]; t0 ^= Te2[byte(s2, 1)]; t1 ^= Te1[byte(s2, 2)]; t2 ^= Te0[byte(s2, 3)]; t0 ^= Te3[byte(s3, 0)]; t1 ^= Te2[byte(s3, 1)]; t2 ^= Te1[byte(s3, 2)]; t3 ^= Te0[byte(s3, 3)]; #else t0 = Te0[byte(s0, 3)] ^ Te1[byte(s1, 2)] ^ Te2[byte(s2, 1)] ^ Te3[byte(s3, 0)] ^ rk[4]; t1 = Te0[byte(s1, 3)] ^ Te1[byte(s2, 2)] ^ Te2[byte(s3, 1)] ^ Te3[byte(s0, 0)] ^ rk[5]; t2 = Te0[byte(s2, 3)] ^ Te1[byte(s3, 2)] ^ Te2[byte(s0, 1)] ^ Te3[byte(s1, 0)] ^ rk[6]; t3 = Te0[byte(s3, 3)] ^ Te1[byte(s0, 2)] ^ Te2[byte(s1, 1)] ^ Te3[byte(s2, 0)] ^ rk[7]; #endif rk += 8; if (--r == 0) { break; } /* this second half optimization actually makes it slower on the Athlon, use with caution. */ #if 0 s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; s0 = rk[0]; s1 ^= Te3[byte(t0, 0)]; s2 ^= Te2[byte(t0, 1)]; s3 ^= Te1[byte(t0, 2)]; s0 ^= Te0[byte(t0, 3)]; s2 ^= Te3[byte(t1, 0)]; s3 ^= Te2[byte(t1, 1)]; s0 ^= Te1[byte(t1, 2)]; s1 ^= Te0[byte(t1, 3)]; s3 ^= Te3[byte(t2, 0)]; s0 ^= Te2[byte(t2, 1)]; s1 ^= Te1[byte(t2, 2)]; s2 ^= Te0[byte(t2, 3)]; s0 ^= Te3[byte(t3, 0)]; s1 ^= Te2[byte(t3, 1)]; s2 ^= Te1[byte(t3, 2)]; s3 ^= Te0[byte(t3, 3)]; #else s0 = Te0[byte(t0, 3)] ^ Te1[byte(t1, 2)] ^ Te2[byte(t2, 1)] ^ Te3[byte(t3, 0)] ^ rk[0]; s1 = Te0[byte(t1, 3)] ^ Te1[byte(t2, 2)] ^ Te2[byte(t3, 1)] ^ Te3[byte(t0, 0)] ^ rk[1]; s2 = Te0[byte(t2, 3)] ^ Te1[byte(t3, 2)] ^ Te2[byte(t0, 1)] ^ Te3[byte(t1, 0)] ^ rk[2]; s3 = Te0[byte(t3, 3)] ^ Te1[byte(t0, 2)] ^ Te2[byte(t1, 1)] ^ Te3[byte(t2, 0)] ^ rk[3]; #endif } /* * apply last round and * map cipher state to byte array block: */ s0 = (Te4_3[(t0 >> 24) ]) ^ (Te4_2[(t1 >> 16) & 0xff]) ^ (Te4_1[(t2 >> 8) & 0xff]) ^ (Te4_0[(t3 ) & 0xff]) ^ rk[0]; STORE32H(s0, ct); s1 = (Te4_3[(t1 >> 24) ]) ^ (Te4_2[(t2 >> 16) & 0xff]) ^ (Te4_1[(t3 >> 8) & 0xff]) ^ (Te4_0[(t0 ) & 0xff]) ^ rk[1]; STORE32H(s1, ct+4); s2 = (Te4_3[(t2 >> 24) ]) ^ (Te4_2[(t3 >> 16) & 0xff]) ^ (Te4_1[(t0 >> 8) & 0xff]) ^ (Te4_0[(t1 ) & 0xff]) ^ rk[2]; STORE32H(s2, ct+8); s3 = (Te4_3[(t3 >> 24) ]) ^ (Te4_2[(t0 >> 16) & 0xff]) ^ (Te4_1[(t1 >> 8) & 0xff]) ^ (Te4_0[(t2 ) & 0xff]) ^ rk[3]; STORE32H(s3, ct+12); } void rijndael_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey) { unsigned long s0, s1, s2, s3, t0, t1, t2, t3, *rk; int Nr, r; _ARGCHK(pt != NULL); _ARGCHK(ct != NULL); _ARGCHK(skey != NULL); Nr = skey->rijndael.Nr; rk = skey->rijndael.dK; /* * map byte array block to cipher state * and add initial round key: */ LOAD32H(s0, ct ); s0 ^= rk[0]; LOAD32H(s1, ct + 4); s1 ^= rk[1]; LOAD32H(s2, ct + 8); s2 ^= rk[2]; LOAD32H(s3, ct + 12); s3 ^= rk[3]; /* * Nr - 1 full rounds: */ r = Nr >> 1; for (;;) { t0 = Td0[byte(s0, 3)] ^ Td1[byte(s3, 2)] ^ Td2[byte(s2, 1)] ^ Td3[byte(s1, 0)] ^ rk[4]; t1 = Td0[byte(s1, 3)] ^ Td1[byte(s0, 2)] ^ Td2[byte(s3, 1)] ^ Td3[byte(s2, 0)] ^ rk[5]; t2 = Td0[byte(s2, 3)] ^ Td1[byte(s1, 2)] ^ Td2[byte(s0, 1)] ^ Td3[byte(s3, 0)] ^ rk[6]; t3 = Td0[byte(s3, 3)] ^ Td1[byte(s2, 2)] ^ Td2[byte(s1, 1)] ^ Td3[byte(s0, 0)] ^ rk[7]; rk += 8; if (--r == 0) { break; } s0 = Td0[byte(t0, 3)] ^ Td1[byte(t3, 2)] ^ Td2[byte(t2, 1)] ^ Td3[byte(t1, 0)] ^ rk[0]; s1 = Td0[byte(t1, 3)] ^ Td1[byte(t0, 2)] ^ Td2[byte(t3, 1)] ^ Td3[byte(t2, 0)] ^ rk[1]; s2 = Td0[byte(t2, 3)] ^ Td1[byte(t1, 2)] ^ Td2[byte(t0, 1)] ^ Td3[byte(t3, 0)] ^ rk[2]; s3 = Td0[byte(t3, 3)] ^ Td1[byte(t2, 2)] ^ Td2[byte(t1, 1)] ^ Td3[byte(t0, 0)] ^ rk[3]; } /* * apply last round and * map cipher state to byte array block: */ s0 = (Td4[(t0 >> 24) ] & 0xff000000) ^ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t1 ) & 0xff] & 0x000000ff) ^ rk[0]; STORE32H(s0, pt); s1 = (Td4[(t1 >> 24) ] & 0xff000000) ^ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t2 ) & 0xff] & 0x000000ff) ^ rk[1]; STORE32H(s1, pt+4); s2 = (Td4[(t2 >> 24) ] & 0xff000000) ^ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t3 ) & 0xff] & 0x000000ff) ^ rk[2]; STORE32H(s2, pt+8); s3 = (Td4[(t3 >> 24) ] & 0xff000000) ^ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Td4[(t0 ) & 0xff] & 0x000000ff) ^ rk[3]; STORE32H(s3, pt+12); } int rijndael_test(void) { #ifndef LTC_TEST return CRYPT_NOP; #else int err; static const struct { int keylen; unsigned char key[32], pt[16], ct[16]; } tests[] = { { 16, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, { 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a } }, { 24, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }, { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, { 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91 } }, { 32, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, { 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89 } } }; symmetric_key key; unsigned char tmp[2][16]; int i; for (i = 0; i < (int)(sizeof(tests)/sizeof(tests[0])); i++) { zeromem(&key, sizeof(key)); if ((err = rijndael_setup(tests[i].key, tests[i].keylen, 0, &key)) != CRYPT_OK) { return err; } rijndael_ecb_encrypt(tests[i].pt, tmp[0], &key); rijndael_ecb_decrypt(tmp[0], tmp[1], &key); if (memcmp(tmp[0], tests[i].ct, 16) || memcmp(tmp[1], tests[i].pt, 16)) { #if 0 printf("\n\nTest %d failed\n", i); if (memcmp(tmp[0], tests[i].ct, 16)) { printf("CT: "); for (i = 0; i < 16; i++) { printf("%02x ", tmp[0][i]); } printf("\n"); } else { printf("PT: "); for (i = 0; i < 16; i++) { printf("%02x ", tmp[1][i]); } printf("\n"); } #endif return CRYPT_FAIL_TESTVECTOR; } } return CRYPT_OK; #endif } int rijndael_keysize(int *desired_keysize) { _ARGCHK(desired_keysize != NULL); if (*desired_keysize < 16) return CRYPT_INVALID_KEYSIZE; if (*desired_keysize < 24) { *desired_keysize = 16; return CRYPT_OK; } else if (*desired_keysize < 32) { *desired_keysize = 24; return CRYPT_OK; } else { *desired_keysize = 32; return CRYPT_OK; } } #endif