tomcrypt/aes.c
2010-06-16 12:38:00 +02:00

582 lines
16 KiB
C

/* AES implementation by Tom St Denis
*
* Derived from the Public Domain source code by
---
* rijndael-alg-fst.c
*
* @version 3.0 (December 2000)
*
* Optimised ANSI C code for the Rijndael cipher (now AES)
*
* @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
* @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
* @author Paulo Barreto <paulo.barreto@terra.com.br>
---
*/
#include "mycrypt.h"
#ifdef RIJNDAEL
const struct _cipher_descriptor rijndael_desc =
{
"rijndael",
6,
16, 32, 16, 10,
&rijndael_setup,
&rijndael_ecb_encrypt,
&rijndael_ecb_decrypt,
&rijndael_test,
&rijndael_keysize
};
const struct _cipher_descriptor aes_desc =
{
"aes",
6,
16, 32, 16, 10,
&rijndael_setup,
&rijndael_ecb_encrypt,
&rijndael_ecb_decrypt,
&rijndael_test,
&rijndael_keysize
};
#include "aes_tab.c"
int rijndael_setup(const unsigned char *key, int keylen, int rounds, symmetric_key *skey)
{
int i = 0, j;
unsigned long temp, *rk, *rrk;
_ARGCHK(key != NULL);
_ARGCHK(skey != NULL);
if (keylen != 16 && keylen != 24 && keylen != 32) {
return CRYPT_INVALID_KEYSIZE;
}
if (rounds != 0 && rounds != (10 + ((keylen/8)-2)*2)) {
return CRYPT_INVALID_ROUNDS;
}
skey->rijndael.Nr = 10 + ((keylen/8)-2)*2;
/* setup the forward key */
rk = skey->rijndael.eK;
LOAD32H(rk[0], key );
LOAD32H(rk[1], key + 4);
LOAD32H(rk[2], key + 8);
LOAD32H(rk[3], key + 12);
if (keylen == 16) {
j = 44;
for (;;) {
temp = rk[3];
rk[4] = rk[0] ^
(Te4_3[byte(temp, 2)]) ^
(Te4_2[byte(temp, 1)]) ^
(Te4_1[byte(temp, 0)]) ^
(Te4_0[byte(temp, 3)]) ^
rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
rk[7] = rk[3] ^ rk[6];
if (++i == 10) {
break;
}
rk += 4;
}
} else if (keylen == 24) {
j = 52;
LOAD32H(rk[4], key + 16);
LOAD32H(rk[5], key + 20);
for (;;) {
#ifdef _MSC_VER
temp = skey->rijndael.eK[rk - skey->rijndael.eK + 5];
#else
temp = rk[5];
#endif
rk[ 6] = rk[ 0] ^
(Te4_3[byte(temp, 2)]) ^
(Te4_2[byte(temp, 1)]) ^
(Te4_1[byte(temp, 0)]) ^
(Te4_0[byte(temp, 3)]) ^
rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
rk[ 9] = rk[ 3] ^ rk[ 8];
if (++i == 8) {
break;
}
rk[10] = rk[ 4] ^ rk[ 9];
rk[11] = rk[ 5] ^ rk[10];
rk += 6;
}
} else if (keylen == 32) {
j = 60;
LOAD32H(rk[4], key + 16);
LOAD32H(rk[5], key + 20);
LOAD32H(rk[6], key + 24);
LOAD32H(rk[7], key + 28);
for (;;) {
#ifdef _MSC_VER
temp = skey->rijndael.eK[rk - skey->rijndael.eK + 7];
#else
temp = rk[7];
#endif
rk[ 8] = rk[ 0] ^
(Te4_3[byte(temp, 2)]) ^
(Te4_2[byte(temp, 1)]) ^
(Te4_1[byte(temp, 0)]) ^
(Te4_0[byte(temp, 3)]) ^
rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
rk[11] = rk[ 3] ^ rk[10];
if (++i == 7) {
break;
}
temp = rk[11];
rk[12] = rk[ 4] ^
(Te4_3[byte(temp, 3)]) ^
(Te4_2[byte(temp, 2)]) ^
(Te4_1[byte(temp, 1)]) ^
(Te4_0[byte(temp, 0)]);
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
rk += 8;
}
} else {
/* this can't happen */
j = 4;
}
/* setup the inverse key now */
rk = skey->rijndael.dK;
rrk = skey->rijndael.eK + j - 4;
/* apply the inverse MixColumn transform to all round keys but the first and the last: */
/* copy first */
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk = *rrk;
rk -= 3; rrk -= 3;
for (i = 1; i < skey->rijndael.Nr; i++) {
rrk -= 4;
rk += 4;
#ifdef SMALL_CODE
temp = rrk[0];
rk[0] =
Td0[255 & Te4[byte(temp, 3)]] ^
Td1[255 & Te4[byte(temp, 2)]] ^
Td2[255 & Te4[byte(temp, 1)]] ^
Td3[255 & Te4[byte(temp, 0)]];
temp = rrk[1];
rk[1] =
Td0[255 & Te4[byte(temp, 3)]] ^
Td1[255 & Te4[byte(temp, 2)]] ^
Td2[255 & Te4[byte(temp, 1)]] ^
Td3[255 & Te4[byte(temp, 0)]];
temp = rrk[2];
rk[2] =
Td0[255 & Te4[byte(temp, 3)]] ^
Td1[255 & Te4[byte(temp, 2)]] ^
Td2[255 & Te4[byte(temp, 1)]] ^
Td3[255 & Te4[byte(temp, 0)]];
temp = rrk[3];
rk[3] =
Td0[255 & Te4[byte(temp, 3)]] ^
Td1[255 & Te4[byte(temp, 2)]] ^
Td2[255 & Te4[byte(temp, 1)]] ^
Td3[255 & Te4[byte(temp, 0)]];
#else
temp = rrk[0];
rk[0] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
temp = rrk[1];
rk[1] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
temp = rrk[2];
rk[2] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
temp = rrk[3];
rk[3] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
#endif
}
/* copy last */
rrk -= 4;
rk += 4;
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk = *rrk;
return CRYPT_OK;
}
void rijndael_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
{
unsigned long s0, s1, s2, s3, t0, t1, t2, t3, *rk;
int Nr, r;
_ARGCHK(pt != NULL);
_ARGCHK(ct != NULL);
_ARGCHK(skey != NULL);
Nr = skey->rijndael.Nr;
rk = skey->rijndael.eK;
/*
* map byte array block to cipher state
* and add initial round key:
*/
LOAD32H(s0, pt ); s0 ^= rk[0];
LOAD32H(s1, pt + 4); s1 ^= rk[1];
LOAD32H(s2, pt + 8); s2 ^= rk[2];
LOAD32H(s3, pt + 12); s3 ^= rk[3];
/*
* Nr - 1 full rounds:
*/
r = Nr >> 1;
for (;;) {
/* Both of these blocks are equivalent except the top is more friendlier for x86 processors */
#if defined(__GNUC__)
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
t1 ^= Te3[byte(s0, 0)]; t2 ^= Te2[byte(s0, 1)]; t3 ^= Te1[byte(s0, 2)]; t0 ^= Te0[byte(s0, 3)];
t2 ^= Te3[byte(s1, 0)]; t3 ^= Te2[byte(s1, 1)]; t0 ^= Te1[byte(s1, 2)]; t1 ^= Te0[byte(s1, 3)];
t3 ^= Te3[byte(s2, 0)]; t0 ^= Te2[byte(s2, 1)]; t1 ^= Te1[byte(s2, 2)]; t2 ^= Te0[byte(s2, 3)];
t0 ^= Te3[byte(s3, 0)]; t1 ^= Te2[byte(s3, 1)]; t2 ^= Te1[byte(s3, 2)]; t3 ^= Te0[byte(s3, 3)];
#else
t0 =
Te0[byte(s0, 3)] ^
Te1[byte(s1, 2)] ^
Te2[byte(s2, 1)] ^
Te3[byte(s3, 0)] ^
rk[4];
t1 =
Te0[byte(s1, 3)] ^
Te1[byte(s2, 2)] ^
Te2[byte(s3, 1)] ^
Te3[byte(s0, 0)] ^
rk[5];
t2 =
Te0[byte(s2, 3)] ^
Te1[byte(s3, 2)] ^
Te2[byte(s0, 1)] ^
Te3[byte(s1, 0)] ^
rk[6];
t3 =
Te0[byte(s3, 3)] ^
Te1[byte(s0, 2)] ^
Te2[byte(s1, 1)] ^
Te3[byte(s2, 0)] ^
rk[7];
#endif
rk += 8;
if (--r == 0) {
break;
}
/* this second half optimization actually makes it slower on the Athlon, use with caution. */
#if 0
s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; s0 = rk[0];
s1 ^= Te3[byte(t0, 0)]; s2 ^= Te2[byte(t0, 1)]; s3 ^= Te1[byte(t0, 2)]; s0 ^= Te0[byte(t0, 3)];
s2 ^= Te3[byte(t1, 0)]; s3 ^= Te2[byte(t1, 1)]; s0 ^= Te1[byte(t1, 2)]; s1 ^= Te0[byte(t1, 3)];
s3 ^= Te3[byte(t2, 0)]; s0 ^= Te2[byte(t2, 1)]; s1 ^= Te1[byte(t2, 2)]; s2 ^= Te0[byte(t2, 3)];
s0 ^= Te3[byte(t3, 0)]; s1 ^= Te2[byte(t3, 1)]; s2 ^= Te1[byte(t3, 2)]; s3 ^= Te0[byte(t3, 3)];
#else
s0 =
Te0[byte(t0, 3)] ^
Te1[byte(t1, 2)] ^
Te2[byte(t2, 1)] ^
Te3[byte(t3, 0)] ^
rk[0];
s1 =
Te0[byte(t1, 3)] ^
Te1[byte(t2, 2)] ^
Te2[byte(t3, 1)] ^
Te3[byte(t0, 0)] ^
rk[1];
s2 =
Te0[byte(t2, 3)] ^
Te1[byte(t3, 2)] ^
Te2[byte(t0, 1)] ^
Te3[byte(t1, 0)] ^
rk[2];
s3 =
Te0[byte(t3, 3)] ^
Te1[byte(t0, 2)] ^
Te2[byte(t1, 1)] ^
Te3[byte(t2, 0)] ^
rk[3];
#endif
}
/*
* apply last round and
* map cipher state to byte array block:
*/
s0 =
(Te4_3[(t0 >> 24) ]) ^
(Te4_2[(t1 >> 16) & 0xff]) ^
(Te4_1[(t2 >> 8) & 0xff]) ^
(Te4_0[(t3 ) & 0xff]) ^
rk[0];
STORE32H(s0, ct);
s1 =
(Te4_3[(t1 >> 24) ]) ^
(Te4_2[(t2 >> 16) & 0xff]) ^
(Te4_1[(t3 >> 8) & 0xff]) ^
(Te4_0[(t0 ) & 0xff]) ^
rk[1];
STORE32H(s1, ct+4);
s2 =
(Te4_3[(t2 >> 24) ]) ^
(Te4_2[(t3 >> 16) & 0xff]) ^
(Te4_1[(t0 >> 8) & 0xff]) ^
(Te4_0[(t1 ) & 0xff]) ^
rk[2];
STORE32H(s2, ct+8);
s3 =
(Te4_3[(t3 >> 24) ]) ^
(Te4_2[(t0 >> 16) & 0xff]) ^
(Te4_1[(t1 >> 8) & 0xff]) ^
(Te4_0[(t2 ) & 0xff]) ^
rk[3];
STORE32H(s3, ct+12);
}
void rijndael_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey) {
unsigned long s0, s1, s2, s3, t0, t1, t2, t3, *rk;
int Nr, r;
_ARGCHK(pt != NULL);
_ARGCHK(ct != NULL);
_ARGCHK(skey != NULL);
Nr = skey->rijndael.Nr;
rk = skey->rijndael.dK;
/*
* map byte array block to cipher state
* and add initial round key:
*/
LOAD32H(s0, ct ); s0 ^= rk[0];
LOAD32H(s1, ct + 4); s1 ^= rk[1];
LOAD32H(s2, ct + 8); s2 ^= rk[2];
LOAD32H(s3, ct + 12); s3 ^= rk[3];
/*
* Nr - 1 full rounds:
*/
r = Nr >> 1;
for (;;) {
t0 =
Td0[byte(s0, 3)] ^
Td1[byte(s3, 2)] ^
Td2[byte(s2, 1)] ^
Td3[byte(s1, 0)] ^
rk[4];
t1 =
Td0[byte(s1, 3)] ^
Td1[byte(s0, 2)] ^
Td2[byte(s3, 1)] ^
Td3[byte(s2, 0)] ^
rk[5];
t2 =
Td0[byte(s2, 3)] ^
Td1[byte(s1, 2)] ^
Td2[byte(s0, 1)] ^
Td3[byte(s3, 0)] ^
rk[6];
t3 =
Td0[byte(s3, 3)] ^
Td1[byte(s2, 2)] ^
Td2[byte(s1, 1)] ^
Td3[byte(s0, 0)] ^
rk[7];
rk += 8;
if (--r == 0) {
break;
}
s0 =
Td0[byte(t0, 3)] ^
Td1[byte(t3, 2)] ^
Td2[byte(t2, 1)] ^
Td3[byte(t1, 0)] ^
rk[0];
s1 =
Td0[byte(t1, 3)] ^
Td1[byte(t0, 2)] ^
Td2[byte(t3, 1)] ^
Td3[byte(t2, 0)] ^
rk[1];
s2 =
Td0[byte(t2, 3)] ^
Td1[byte(t1, 2)] ^
Td2[byte(t0, 1)] ^
Td3[byte(t3, 0)] ^
rk[2];
s3 =
Td0[byte(t3, 3)] ^
Td1[byte(t2, 2)] ^
Td2[byte(t1, 1)] ^
Td3[byte(t0, 0)] ^
rk[3];
}
/*
* apply last round and
* map cipher state to byte array block:
*/
s0 =
(Td4[(t0 >> 24) ] & 0xff000000) ^
(Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t1 ) & 0xff] & 0x000000ff) ^
rk[0];
STORE32H(s0, pt);
s1 =
(Td4[(t1 >> 24) ] & 0xff000000) ^
(Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t2 ) & 0xff] & 0x000000ff) ^
rk[1];
STORE32H(s1, pt+4);
s2 =
(Td4[(t2 >> 24) ] & 0xff000000) ^
(Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t3 ) & 0xff] & 0x000000ff) ^
rk[2];
STORE32H(s2, pt+8);
s3 =
(Td4[(t3 >> 24) ] & 0xff000000) ^
(Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
(Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
(Td4[(t0 ) & 0xff] & 0x000000ff) ^
rk[3];
STORE32H(s3, pt+12);
}
int rijndael_test(void)
{
#ifndef LTC_TEST
return CRYPT_NOP;
#else
int err;
static const struct {
int keylen;
unsigned char key[32], pt[16], ct[16];
} tests[] = {
{ 16,
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
{ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
{ 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30,
0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a }
}, {
24,
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 },
{ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
{ 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0,
0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91 }
}, {
32,
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
{ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
{ 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf,
0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89 }
}
};
symmetric_key key;
unsigned char tmp[2][16];
int i;
for (i = 0; i < (int)(sizeof(tests)/sizeof(tests[0])); i++) {
zeromem(&key, sizeof(key));
if ((err = rijndael_setup(tests[i].key, tests[i].keylen, 0, &key)) != CRYPT_OK) {
return err;
}
rijndael_ecb_encrypt(tests[i].pt, tmp[0], &key);
rijndael_ecb_decrypt(tmp[0], tmp[1], &key);
if (memcmp(tmp[0], tests[i].ct, 16) || memcmp(tmp[1], tests[i].pt, 16)) {
#if 0
printf("\n\nTest %d failed\n", i);
if (memcmp(tmp[0], tests[i].ct, 16)) {
printf("CT: ");
for (i = 0; i < 16; i++) {
printf("%02x ", tmp[0][i]);
}
printf("\n");
} else {
printf("PT: ");
for (i = 0; i < 16; i++) {
printf("%02x ", tmp[1][i]);
}
printf("\n");
}
#endif
return CRYPT_FAIL_TESTVECTOR;
}
}
return CRYPT_OK;
#endif
}
int rijndael_keysize(int *desired_keysize)
{
_ARGCHK(desired_keysize != NULL);
if (*desired_keysize < 16)
return CRYPT_INVALID_KEYSIZE;
if (*desired_keysize < 24) {
*desired_keysize = 16;
return CRYPT_OK;
} else if (*desired_keysize < 32) {
*desired_keysize = 24;
return CRYPT_OK;
} else {
*desired_keysize = 32;
return CRYPT_OK;
}
}
#endif