377 lines
11 KiB
ArmAsm
377 lines
11 KiB
ArmAsm
|
// -------------------------------------------------------------------------
|
||
|
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
|
||
|
// All rights reserved.
|
||
|
//
|
||
|
// LICENSE TERMS
|
||
|
//
|
||
|
// The free distribution and use of this software in both source and binary
|
||
|
// form is allowed (with or without changes) provided that:
|
||
|
//
|
||
|
// 1. distributions of this source code include the above copyright
|
||
|
// notice, this list of conditions and the following disclaimer//
|
||
|
//
|
||
|
// 2. distributions in binary form include the above copyright
|
||
|
// notice, this list of conditions and the following disclaimer
|
||
|
// in the documentation and/or other associated materials//
|
||
|
//
|
||
|
// 3. the copyright holder's name is not used to endorse products
|
||
|
// built using this software without specific written permission.
|
||
|
//
|
||
|
//
|
||
|
// ALTERNATIVELY, provided that this notice is retained in full, this product
|
||
|
// may be distributed under the terms of the GNU General Public License (GPL),
|
||
|
// in which case the provisions of the GPL apply INSTEAD OF those given above.
|
||
|
//
|
||
|
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
|
||
|
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
|
||
|
|
||
|
// DISCLAIMER
|
||
|
//
|
||
|
// This software is provided 'as is' with no explicit or implied warranties
|
||
|
// in respect of its properties including, but not limited to, correctness
|
||
|
// and fitness for purpose.
|
||
|
// -------------------------------------------------------------------------
|
||
|
// Issue Date: 29/07/2002
|
||
|
|
||
|
.file "aes-i586-asm.S"
|
||
|
.text
|
||
|
|
||
|
// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
|
||
|
// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
|
||
|
|
||
|
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
|
||
|
|
||
|
// offsets to parameters with one register pushed onto stack
|
||
|
|
||
|
#define in_blk 8 // input byte array address parameter
|
||
|
#define out_blk 12 // output byte array address parameter
|
||
|
#define ctx 16 // AES context structure
|
||
|
|
||
|
// offsets in context structure
|
||
|
|
||
|
#define ekey 0 // encryption key schedule base address
|
||
|
#define nrnd 256 // number of rounds
|
||
|
#define dkey 260 // decryption key schedule base address
|
||
|
|
||
|
// register mapping for encrypt and decrypt subroutines
|
||
|
|
||
|
#define r0 eax
|
||
|
#define r1 ebx
|
||
|
#define r2 ecx
|
||
|
#define r3 edx
|
||
|
#define r4 esi
|
||
|
#define r5 edi
|
||
|
|
||
|
#define eaxl al
|
||
|
#define eaxh ah
|
||
|
#define ebxl bl
|
||
|
#define ebxh bh
|
||
|
#define ecxl cl
|
||
|
#define ecxh ch
|
||
|
#define edxl dl
|
||
|
#define edxh dh
|
||
|
|
||
|
#define _h(reg) reg##h
|
||
|
#define h(reg) _h(reg)
|
||
|
|
||
|
#define _l(reg) reg##l
|
||
|
#define l(reg) _l(reg)
|
||
|
|
||
|
// This macro takes a 32-bit word representing a column and uses
|
||
|
// each of its four bytes to index into four tables of 256 32-bit
|
||
|
// words to obtain values that are then xored into the appropriate
|
||
|
// output registers r0, r1, r4 or r5.
|
||
|
|
||
|
// Parameters:
|
||
|
// table table base address
|
||
|
// %1 out_state[0]
|
||
|
// %2 out_state[1]
|
||
|
// %3 out_state[2]
|
||
|
// %4 out_state[3]
|
||
|
// idx input register for the round (destroyed)
|
||
|
// tmp scratch register for the round
|
||
|
// sched key schedule
|
||
|
|
||
|
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
|
||
|
movzx %l(idx),%tmp; \
|
||
|
xor table(,%tmp,4),%a1; \
|
||
|
movzx %h(idx),%tmp; \
|
||
|
shr $16,%idx; \
|
||
|
xor table+tlen(,%tmp,4),%a2; \
|
||
|
movzx %l(idx),%tmp; \
|
||
|
movzx %h(idx),%idx; \
|
||
|
xor table+2*tlen(,%tmp,4),%a3; \
|
||
|
xor table+3*tlen(,%idx,4),%a4;
|
||
|
|
||
|
// initialise output registers from the key schedule
|
||
|
// NB1: original value of a3 is in idx on exit
|
||
|
// NB2: original values of a1,a2,a4 aren't used
|
||
|
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||
|
mov 0 sched,%a1; \
|
||
|
movzx %l(idx),%tmp; \
|
||
|
mov 12 sched,%a2; \
|
||
|
xor table(,%tmp,4),%a1; \
|
||
|
mov 4 sched,%a4; \
|
||
|
movzx %h(idx),%tmp; \
|
||
|
shr $16,%idx; \
|
||
|
xor table+tlen(,%tmp,4),%a2; \
|
||
|
movzx %l(idx),%tmp; \
|
||
|
movzx %h(idx),%idx; \
|
||
|
xor table+3*tlen(,%idx,4),%a4; \
|
||
|
mov %a3,%idx; \
|
||
|
mov 8 sched,%a3; \
|
||
|
xor table+2*tlen(,%tmp,4),%a3;
|
||
|
|
||
|
// initialise output registers from the key schedule
|
||
|
// NB1: original value of a3 is in idx on exit
|
||
|
// NB2: original values of a1,a2,a4 aren't used
|
||
|
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||
|
mov 0 sched,%a1; \
|
||
|
movzx %l(idx),%tmp; \
|
||
|
mov 4 sched,%a2; \
|
||
|
xor table(,%tmp,4),%a1; \
|
||
|
mov 12 sched,%a4; \
|
||
|
movzx %h(idx),%tmp; \
|
||
|
shr $16,%idx; \
|
||
|
xor table+tlen(,%tmp,4),%a2; \
|
||
|
movzx %l(idx),%tmp; \
|
||
|
movzx %h(idx),%idx; \
|
||
|
xor table+3*tlen(,%idx,4),%a4; \
|
||
|
mov %a3,%idx; \
|
||
|
mov 8 sched,%a3; \
|
||
|
xor table+2*tlen(,%tmp,4),%a3;
|
||
|
|
||
|
|
||
|
// original Gladman had conditional saves to MMX regs.
|
||
|
#define save(a1, a2) \
|
||
|
mov %a2,4*a1(%esp)
|
||
|
|
||
|
#define restore(a1, a2) \
|
||
|
mov 4*a2(%esp),%a1
|
||
|
|
||
|
// These macros perform a forward encryption cycle. They are entered with
|
||
|
// the first previous round column values in r0,r1,r4,r5 and
|
||
|
// exit with the final values in the same registers, using stack
|
||
|
// for temporary storage.
|
||
|
|
||
|
// round column values
|
||
|
// on entry: r0,r1,r4,r5
|
||
|
// on exit: r2,r1,r4,r5
|
||
|
#define fwd_rnd1(arg, table) \
|
||
|
save (0,r1); \
|
||
|
save (1,r5); \
|
||
|
\
|
||
|
/* compute new column values */ \
|
||
|
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
|
||
|
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
|
||
|
restore(r0,0); \
|
||
|
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
|
||
|
restore(r0,1); \
|
||
|
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
|
||
|
|
||
|
// round column values
|
||
|
// on entry: r2,r1,r4,r5
|
||
|
// on exit: r0,r1,r4,r5
|
||
|
#define fwd_rnd2(arg, table) \
|
||
|
save (0,r1); \
|
||
|
save (1,r5); \
|
||
|
\
|
||
|
/* compute new column values */ \
|
||
|
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
|
||
|
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
|
||
|
restore(r2,0); \
|
||
|
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
|
||
|
restore(r2,1); \
|
||
|
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
|
||
|
|
||
|
// These macros performs an inverse encryption cycle. They are entered with
|
||
|
// the first previous round column values in r0,r1,r4,r5 and
|
||
|
// exit with the final values in the same registers, using stack
|
||
|
// for temporary storage
|
||
|
|
||
|
// round column values
|
||
|
// on entry: r0,r1,r4,r5
|
||
|
// on exit: r2,r1,r4,r5
|
||
|
#define inv_rnd1(arg, table) \
|
||
|
save (0,r1); \
|
||
|
save (1,r5); \
|
||
|
\
|
||
|
/* compute new column values */ \
|
||
|
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
|
||
|
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
|
||
|
restore(r0,0); \
|
||
|
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
|
||
|
restore(r0,1); \
|
||
|
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
|
||
|
|
||
|
// round column values
|
||
|
// on entry: r2,r1,r4,r5
|
||
|
// on exit: r0,r1,r4,r5
|
||
|
#define inv_rnd2(arg, table) \
|
||
|
save (0,r1); \
|
||
|
save (1,r5); \
|
||
|
\
|
||
|
/* compute new column values */ \
|
||
|
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
|
||
|
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
|
||
|
restore(r2,0); \
|
||
|
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
|
||
|
restore(r2,1); \
|
||
|
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
|
||
|
|
||
|
// AES (Rijndael) Encryption Subroutine
|
||
|
|
||
|
.global aes_enc_blk
|
||
|
|
||
|
.extern ft_tab
|
||
|
.extern fl_tab
|
||
|
|
||
|
.align 4
|
||
|
|
||
|
aes_enc_blk:
|
||
|
push %ebp
|
||
|
mov ctx(%esp),%ebp // pointer to context
|
||
|
|
||
|
// CAUTION: the order and the values used in these assigns
|
||
|
// rely on the register mappings
|
||
|
|
||
|
1: push %ebx
|
||
|
mov in_blk+4(%esp),%r2
|
||
|
push %esi
|
||
|
mov nrnd(%ebp),%r3 // number of rounds
|
||
|
push %edi
|
||
|
#if ekey != 0
|
||
|
lea ekey(%ebp),%ebp // key pointer
|
||
|
#endif
|
||
|
|
||
|
// input four columns and xor in first round key
|
||
|
|
||
|
mov (%r2),%r0
|
||
|
mov 4(%r2),%r1
|
||
|
mov 8(%r2),%r4
|
||
|
mov 12(%r2),%r5
|
||
|
xor (%ebp),%r0
|
||
|
xor 4(%ebp),%r1
|
||
|
xor 8(%ebp),%r4
|
||
|
xor 12(%ebp),%r5
|
||
|
|
||
|
sub $8,%esp // space for register saves on stack
|
||
|
add $16,%ebp // increment to next round key
|
||
|
sub $10,%r3
|
||
|
je 4f // 10 rounds for 128-bit key
|
||
|
add $32,%ebp
|
||
|
sub $2,%r3
|
||
|
je 3f // 12 rounds for 128-bit key
|
||
|
add $32,%ebp
|
||
|
|
||
|
2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 128-bit key
|
||
|
fwd_rnd2( -48(%ebp) ,ft_tab)
|
||
|
3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 128-bit key
|
||
|
fwd_rnd2( -16(%ebp) ,ft_tab)
|
||
|
4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
|
||
|
fwd_rnd2( +16(%ebp) ,ft_tab)
|
||
|
fwd_rnd1( +32(%ebp) ,ft_tab)
|
||
|
fwd_rnd2( +48(%ebp) ,ft_tab)
|
||
|
fwd_rnd1( +64(%ebp) ,ft_tab)
|
||
|
fwd_rnd2( +80(%ebp) ,ft_tab)
|
||
|
fwd_rnd1( +96(%ebp) ,ft_tab)
|
||
|
fwd_rnd2(+112(%ebp) ,ft_tab)
|
||
|
fwd_rnd1(+128(%ebp) ,ft_tab)
|
||
|
fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
|
||
|
|
||
|
// move final values to the output array. CAUTION: the
|
||
|
// order of these assigns rely on the register mappings
|
||
|
|
||
|
add $8,%esp
|
||
|
mov out_blk+12(%esp),%ebp
|
||
|
mov %r5,12(%ebp)
|
||
|
pop %edi
|
||
|
mov %r4,8(%ebp)
|
||
|
pop %esi
|
||
|
mov %r1,4(%ebp)
|
||
|
pop %ebx
|
||
|
mov %r0,(%ebp)
|
||
|
pop %ebp
|
||
|
mov $1,%eax
|
||
|
ret
|
||
|
|
||
|
// AES (Rijndael) Decryption Subroutine
|
||
|
|
||
|
.global aes_dec_blk
|
||
|
|
||
|
.extern it_tab
|
||
|
.extern il_tab
|
||
|
|
||
|
.align 4
|
||
|
|
||
|
aes_dec_blk:
|
||
|
push %ebp
|
||
|
mov ctx(%esp),%ebp // pointer to context
|
||
|
|
||
|
// CAUTION: the order and the values used in these assigns
|
||
|
// rely on the register mappings
|
||
|
|
||
|
1: push %ebx
|
||
|
mov in_blk+4(%esp),%r2
|
||
|
push %esi
|
||
|
mov nrnd(%ebp),%r3 // number of rounds
|
||
|
push %edi
|
||
|
#if dkey != 0
|
||
|
lea dkey(%ebp),%ebp // key pointer
|
||
|
#endif
|
||
|
mov %r3,%r0
|
||
|
shl $4,%r0
|
||
|
add %r0,%ebp
|
||
|
|
||
|
// input four columns and xor in first round key
|
||
|
|
||
|
mov (%r2),%r0
|
||
|
mov 4(%r2),%r1
|
||
|
mov 8(%r2),%r4
|
||
|
mov 12(%r2),%r5
|
||
|
xor (%ebp),%r0
|
||
|
xor 4(%ebp),%r1
|
||
|
xor 8(%ebp),%r4
|
||
|
xor 12(%ebp),%r5
|
||
|
|
||
|
sub $8,%esp // space for register saves on stack
|
||
|
sub $16,%ebp // increment to next round key
|
||
|
sub $10,%r3
|
||
|
je 4f // 10 rounds for 128-bit key
|
||
|
sub $32,%ebp
|
||
|
sub $2,%r3
|
||
|
je 3f // 12 rounds for 128-bit key
|
||
|
sub $32,%ebp
|
||
|
|
||
|
2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 128-bit key
|
||
|
inv_rnd2( +48(%ebp), it_tab)
|
||
|
3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 128-bit key
|
||
|
inv_rnd2( +16(%ebp), it_tab)
|
||
|
4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
|
||
|
inv_rnd2( -16(%ebp), it_tab)
|
||
|
inv_rnd1( -32(%ebp), it_tab)
|
||
|
inv_rnd2( -48(%ebp), it_tab)
|
||
|
inv_rnd1( -64(%ebp), it_tab)
|
||
|
inv_rnd2( -80(%ebp), it_tab)
|
||
|
inv_rnd1( -96(%ebp), it_tab)
|
||
|
inv_rnd2(-112(%ebp), it_tab)
|
||
|
inv_rnd1(-128(%ebp), it_tab)
|
||
|
inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
|
||
|
|
||
|
// move final values to the output array. CAUTION: the
|
||
|
// order of these assigns rely on the register mappings
|
||
|
|
||
|
add $8,%esp
|
||
|
mov out_blk+12(%esp),%ebp
|
||
|
mov %r5,12(%ebp)
|
||
|
pop %edi
|
||
|
mov %r4,8(%ebp)
|
||
|
pop %esi
|
||
|
mov %r1,4(%ebp)
|
||
|
pop %ebx
|
||
|
mov %r0,(%ebp)
|
||
|
pop %ebp
|
||
|
mov $1,%eax
|
||
|
ret
|
||
|
|