Home | History | Annotate | Download | only in smp
      1 /*
      2  ---------------------------------------------------------------------------
      3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
      4 
      5  LICENSE TERMS
      6 
      7  The redistribution and use of this software (with or without changes)
      8  is allowed without the payment of fees or royalties provided that:
      9 
     10   1. source code distributions include the above copyright notice, this
     11      list of conditions and the following disclaimer;
     12 
     13   2. binary distributions include the above copyright notice, this list
     14      of conditions and the following disclaimer in their documentation;
     15 
     16   3. the name of the copyright holder is not used to endorse products
     17      built using this software without specific written permission.
     18 
     19  DISCLAIMER
     20 
     21  This software is provided 'as is' with no explicit or implied warranties
     22  in respect of its properties, including, but not limited to, correctness
     23  and/or fitness for purpose.
     24  ---------------------------------------------------------------------------
     25  Issue 09/09/2006
     26 
     27  This is an AES implementation that uses only 8-bit byte operations on the
     28  cipher state (there are options to use 32-bit types if available).
     29 
     30  The combination of mix columns and byte substitution used here is based on
     31  that developed by Karl Malbrain. His contribution is acknowledged.
     32  */
     33 
     34 /* define if you have a fast memcpy function on your system */
     35 #if 1
     36 #  define HAVE_MEMCPY
     37 #  include <string.h>
     38 #if 0
     39 #  if defined( _MSC_VER )
     40 #    include <intrin.h>
     41 #    pragma intrinsic( memcpy )
     42 #  endif
     43 #endif
     44 #endif
     45 
     46 #include <stdlib.h>
     47 
     48 /* add the target configuration to allow using internal data types and compilation options */
     49 #include "bt_target.h"
     50 
     51 /* define if you have fast 32-bit types on your system */
     52 #if 1
     53 #  define HAVE_UINT_32T
     54 #endif
     55 
     56 /* define if you don't want any tables */
     57 #if 1
     58 #  define USE_TABLES
     59 #endif
     60 
     61 /*  On Intel Core 2 duo VERSION_1 is faster */
     62 
     63 /* alternative versions (test for performance on your system) */
     64 #if 1
     65 #  define VERSION_1
     66 #endif
     67 
     68 #include "aes.h"
     69 
     70 #if defined( HAVE_UINT_32T )
     71   typedef UINT32 uint_32t;
     72 #endif
     73 
     74 /* functions for finite field multiplication in the AES Galois field    */
     75 
     76 #define WPOLY   0x011b
     77 #define BPOLY     0x1b
     78 #define DPOLY   0x008d
     79 
     80 #define f1(x)   (x)
     81 #define f2(x)   ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
     82 #define f4(x)   ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
     83 #define f8(x)   ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
     84                           ^ (((x >> 5) & 4) * WPOLY))
     85 #define d2(x)   (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
     86 
     87 #define f3(x)   (f2(x) ^ x)
     88 #define f9(x)   (f8(x) ^ x)
     89 #define fb(x)   (f8(x) ^ f2(x) ^ x)
     90 #define fd(x)   (f8(x) ^ f4(x) ^ x)
     91 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
     92 
     93 #if defined( USE_TABLES )
     94 
     95 #define sb_data(w) {    /* S Box data values */                            \
     96     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
     97     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
     98     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
     99     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
    100     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
    101     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
    102     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
    103     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
    104     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
    105     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
    106     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
    107     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
    108     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
    109     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
    110     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
    111     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
    112     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
    113     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
    114     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
    115     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
    116     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
    117     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
    118     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
    119     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
    120     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
    121     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
    122     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
    123     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
    124     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
    125     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
    126     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
    127     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
    128 
    129 #define isb_data(w) {   /* inverse S Box data values */                    \
    130     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
    131     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
    132     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
    133     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
    134     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
    135     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
    136     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
    137     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
    138     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
    139     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
    140     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
    141     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
    142     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
    143     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
    144     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
    145     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
    146     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
    147     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
    148     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
    149     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
    150     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
    151     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
    152     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
    153     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
    154     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
    155     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
    156     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
    157     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
    158     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
    159     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
    160     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
    161     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
    162 
    163 #define mm_data(w) {    /* basic data for forming finite field tables */   \
    164     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
    165     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
    166     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
    167     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
    168     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
    169     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
    170     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
    171     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
    172     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
    173     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
    174     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
    175     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
    176     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
    177     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
    178     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
    179     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
    180     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
    181     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
    182     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
    183     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
    184     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
    185     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
    186     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
    187     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
    188     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
    189     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
    190     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
    191     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
    192     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
    193     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
    194     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
    195     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
    196 
    197 static const uint_8t sbox[256]  =  sb_data(f1);
    198 static const uint_8t isbox[256] = isb_data(f1);
    199 
    200 static const uint_8t gfm2_sbox[256] = sb_data(f2);
    201 static const uint_8t gfm3_sbox[256] = sb_data(f3);
    202 
    203 static const uint_8t gfmul_9[256] = mm_data(f9);
    204 static const uint_8t gfmul_b[256] = mm_data(fb);
    205 static const uint_8t gfmul_d[256] = mm_data(fd);
    206 static const uint_8t gfmul_e[256] = mm_data(fe);
    207 
    208 #define s_box(x)     sbox[(x)]
    209 #define is_box(x)    isbox[(x)]
    210 #define gfm2_sb(x)   gfm2_sbox[(x)]
    211 #define gfm3_sb(x)   gfm3_sbox[(x)]
    212 #define gfm_9(x)     gfmul_9[(x)]
    213 #define gfm_b(x)     gfmul_b[(x)]
    214 #define gfm_d(x)     gfmul_d[(x)]
    215 #define gfm_e(x)     gfmul_e[(x)]
    216 
    217 #else
    218 
    219 /* this is the high bit of x right shifted by 1 */
    220 /* position. Since the starting polynomial has  */
    221 /* 9 bits (0x11b), this right shift keeps the   */
    222 /* values of all top bits within a byte         */
    223 
    224 static uint_8t hibit(const uint_8t x)
    225 {   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
    226 
    227     r |= (r >> 2);
    228     r |= (r >> 4);
    229     return (r + 1) >> 1;
    230 }
    231 
    232 /* return the inverse of the finite field element x */
    233 
    234 static uint_8t gf_inv(const uint_8t x)
    235 {   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
    236 
    237     if(x < 2)
    238         return x;
    239 
    240     for( ; ; )
    241     {
    242         if(n1)
    243             while(n2 >= n1)             /* divide polynomial p2 by p1    */
    244             {
    245                 n2 /= n1;               /* shift smaller polynomial left */
    246                 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
    247                 v2 ^= (v1 * n2);        /* shift accumulated value and   */
    248                 n2 = hibit(p2);         /* add into result               */
    249             }
    250         else
    251             return v1;
    252 
    253         if(n2)                          /* repeat with values swapped    */
    254             while(n1 >= n2)
    255             {
    256                 n1 /= n2;
    257                 p1 ^= p2 * n1;
    258                 v1 ^= v2 * n1;
    259                 n1 = hibit(p1);
    260             }
    261         else
    262             return v2;
    263     }
    264 }
    265 
    266 /* The forward and inverse affine transformations used in the S-box */
    267 uint_8t fwd_affine(const uint_8t x)
    268 {
    269 #if defined( HAVE_UINT_32T )
    270     uint_32t w = x;
    271     w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
    272     return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
    273 #else
    274     return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
    275                     ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
    276 #endif
    277 }
    278 
    279 uint_8t inv_affine(const uint_8t x)
    280 {
    281 #if defined( HAVE_UINT_32T )
    282     uint_32t w = x;
    283     w = (w << 1) ^ (w << 3) ^ (w << 6);
    284     return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
    285 #else
    286     return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
    287                 ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
    288 #endif
    289 }
    290 
    291 #define s_box(x)   fwd_affine(gf_inv(x))
    292 #define is_box(x)  gf_inv(inv_affine(x))
    293 #define gfm2_sb(x) f2(s_box(x))
    294 #define gfm3_sb(x) f3(s_box(x))
    295 #define gfm_9(x)   f9(x)
    296 #define gfm_b(x)   fb(x)
    297 #define gfm_d(x)   fd(x)
    298 #define gfm_e(x)   fe(x)
    299 
    300 #endif
    301 
    302 #if defined( HAVE_MEMCPY )
    303 #  define block_copy_nn(d, s, l)    memcpy(d, s, l)
    304 #  define block_copy(d, s)          memcpy(d, s, N_BLOCK)
    305 #else
    306 #  define block_copy_nn(d, s, l)    copy_block_nn(d, s, l)
    307 #  define block_copy(d, s)          copy_block(d, s)
    308 #endif
    309 
    310 #if !defined( HAVE_MEMCPY )
    311 static void copy_block( void *d, const void *s )
    312 {
    313 #if defined( HAVE_UINT_32T )
    314     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0];
    315     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1];
    316     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2];
    317     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3];
    318 #else
    319     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0];
    320     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1];
    321     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2];
    322     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3];
    323     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4];
    324     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5];
    325     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6];
    326     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7];
    327     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8];
    328     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9];
    329     ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
    330     ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
    331     ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
    332     ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
    333     ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
    334     ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
    335 #endif
    336 }
    337 
    338 static void copy_block_nn( void * d, const void *s, uint_8t nn )
    339 {
    340     while( nn-- )
    341         *((uint_8t*)d)++ = *((uint_8t*)s)++;
    342 }
    343 #endif
    344 
    345 static void xor_block( void *d, const void *s )
    346 {
    347 #if defined( HAVE_UINT_32T )
    348     ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0];
    349     ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1];
    350     ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2];
    351     ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3];
    352 #else
    353     ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0];
    354     ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1];
    355     ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2];
    356     ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3];
    357     ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4];
    358     ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5];
    359     ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6];
    360     ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7];
    361     ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8];
    362     ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9];
    363     ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
    364     ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
    365     ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
    366     ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
    367     ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
    368     ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
    369 #endif
    370 }
    371 
    372 static void copy_and_key( void *d, const void *s, const void *k )
    373 {
    374 #if defined( HAVE_UINT_32T )
    375     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0];
    376     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1];
    377     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2];
    378     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3];
    379 #elif 1
    380     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0];
    381     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1];
    382     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2];
    383     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3];
    384     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4];
    385     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5];
    386     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6];
    387     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7];
    388     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8];
    389     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9];
    390     ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
    391     ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
    392     ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
    393     ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
    394     ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
    395     ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
    396 #else
    397     block_copy(d, s);
    398     xor_block(d, k);
    399 #endif
    400 }
    401 
    402 static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] )
    403 {
    404     xor_block(d, k);
    405 }
    406 
    407 static void shift_sub_rows( uint_8t st[N_BLOCK] )
    408 {   uint_8t tt;
    409 
    410     st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
    411     st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
    412 
    413     tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
    414     st[ 9] = s_box(st[13]); st[13] = s_box( tt );
    415 
    416     tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
    417     tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
    418 
    419     tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
    420     st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
    421 }
    422 
    423 static void inv_shift_sub_rows( uint_8t st[N_BLOCK] )
    424 {   uint_8t tt;
    425 
    426     st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
    427     st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
    428 
    429     tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
    430     st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
    431 
    432     tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
    433     tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
    434 
    435     tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
    436     st[11] = is_box(st[15]); st[15] = is_box( tt );
    437 }
    438 
    439 #if defined( VERSION_1 )
    440   static void mix_sub_columns( uint_8t dt[N_BLOCK] )
    441   { uint_8t st[N_BLOCK];
    442     block_copy(st, dt);
    443 #else
    444   static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
    445   {
    446 #endif
    447     dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
    448     dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
    449     dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
    450     dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
    451 
    452     dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
    453     dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
    454     dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
    455     dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
    456 
    457     dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
    458     dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
    459     dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
    460     dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
    461 
    462     dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
    463     dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
    464     dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
    465     dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
    466   }
    467 
    468 #if defined( VERSION_1 )
    469   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] )
    470   { uint_8t st[N_BLOCK];
    471     block_copy(st, dt);
    472 #else
    473   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
    474   {
    475 #endif
    476     dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
    477     dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
    478     dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
    479     dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
    480 
    481     dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
    482     dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
    483     dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
    484     dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
    485 
    486     dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
    487     dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
    488     dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
    489     dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
    490 
    491     dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
    492     dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
    493     dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
    494     dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
    495   }
    496 
    497 #if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
    498 
    499 /*  Set the cipher key for the pre-keyed version */
    500 /*  NOTE: If the length_type used for the key length is an
    501     unsigned 8-bit character, a key length of 256 bits must
    502     be entered as a length in bytes (valid inputs are hence
    503     128, 192, 16, 24 and 32).
    504 */
    505 
    506 return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] )
    507 {
    508     uint_8t cc, rc, hi;
    509 
    510     switch( keylen )
    511     {
    512     case 16:
    513     case 128:           /* length in bits (128 = 8*16) */
    514         keylen = 16;
    515         break;
    516     case 24:
    517     case 192:           /* length in bits (192 = 8*24) */
    518         keylen = 24;
    519         break;
    520     case 32:
    521 /*    case 256:           length in bits (256 = 8*32) */
    522         keylen = 32;
    523         break;
    524     default:
    525         ctx->rnd = 0;
    526         return (return_type)-1;
    527     }
    528     block_copy_nn(ctx->ksch, key, keylen);
    529     hi = (keylen + 28) << 2;
    530     ctx->rnd = (hi >> 4) - 1;
    531     for( cc = keylen, rc = 1; cc < hi; cc += 4 )
    532     {   uint_8t tt, t0, t1, t2, t3;
    533 
    534         t0 = ctx->ksch[cc - 4];
    535         t1 = ctx->ksch[cc - 3];
    536         t2 = ctx->ksch[cc - 2];
    537         t3 = ctx->ksch[cc - 1];
    538         if( cc % keylen == 0 )
    539         {
    540             tt = t0;
    541             t0 = s_box(t1) ^ rc;
    542             t1 = s_box(t2);
    543             t2 = s_box(t3);
    544             t3 = s_box(tt);
    545             rc = f2(rc);
    546         }
    547         else if( keylen > 24 && cc % keylen == 16 )
    548         {
    549             t0 = s_box(t0);
    550             t1 = s_box(t1);
    551             t2 = s_box(t2);
    552             t3 = s_box(t3);
    553         }
    554         tt = cc - keylen;
    555         ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
    556         ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
    557         ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
    558         ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
    559     }
    560     return 0;
    561 }
    562 
    563 #endif
    564 
    565 #if defined( AES_ENC_PREKEYED )
    566 
    567 /*  Encrypt a single block of 16 bytes */
    568 
    569 return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char  out[N_BLOCK], const aes_context ctx[1] )
    570 {
    571     if( ctx->rnd )
    572     {
    573         uint_8t s1[N_BLOCK], r;
    574         copy_and_key( s1, in, ctx->ksch );
    575 
    576         for( r = 1 ; r < ctx->rnd ; ++r )
    577 #if defined( VERSION_1 )
    578         {
    579             mix_sub_columns( s1 );
    580             add_round_key( s1, ctx->ksch + r * N_BLOCK);
    581         }
    582 #else
    583         {   uint_8t s2[N_BLOCK];
    584             mix_sub_columns( s2, s1 );
    585             copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
    586         }
    587 #endif
    588         shift_sub_rows( s1 );
    589         copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
    590     }
    591     else
    592         return (return_type)-1;
    593     return 0;
    594 }
    595 
    596 /* CBC encrypt a number of blocks (input and return an IV) */
    597 
    598 return_type aes_cbc_encrypt( const unsigned char *in, unsigned char *out,
    599                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
    600 {
    601 
    602     while(n_block--)
    603     {
    604         xor_block(iv, in);
    605         if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
    606 			return EXIT_FAILURE;
    607         memcpy(out, iv, N_BLOCK);
    608         in += N_BLOCK;
    609         out += N_BLOCK;
    610     }
    611     return EXIT_SUCCESS;
    612 }
    613 
    614 #endif
    615 
    616 #if defined( AES_DEC_PREKEYED )
    617 
    618 /*  Decrypt a single block of 16 bytes */
    619 
    620 return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
    621 {
    622     if( ctx->rnd )
    623     {
    624         uint_8t s1[N_BLOCK], r;
    625         copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
    626         inv_shift_sub_rows( s1 );
    627 
    628         for( r = ctx->rnd ; --r ; )
    629 #if defined( VERSION_1 )
    630         {
    631             add_round_key( s1, ctx->ksch + r * N_BLOCK );
    632             inv_mix_sub_columns( s1 );
    633         }
    634 #else
    635         {   uint_8t s2[N_BLOCK];
    636             copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
    637             inv_mix_sub_columns( s1, s2 );
    638         }
    639 #endif
    640         copy_and_key( out, s1, ctx->ksch );
    641     }
    642     else
    643         return (return_type)-1;
    644     return 0;
    645 }
    646 
    647 /* CBC decrypt a number of blocks (input and return an IV) */
    648 
    649 return_type aes_cbc_decrypt( const unsigned char *in, unsigned char *out,
    650                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
    651 {
    652     while(n_block--)
    653     {   uint_8t tmp[N_BLOCK];
    654 
    655         memcpy(tmp, in, N_BLOCK);
    656         if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
    657 			return EXIT_FAILURE;
    658         xor_block(out, iv);
    659         memcpy(iv, tmp, N_BLOCK);
    660         in += N_BLOCK;
    661         out += N_BLOCK;
    662     }
    663     return EXIT_SUCCESS;
    664 }
    665 
    666 #endif
    667 
    668 #if defined( AES_ENC_128_OTFK )
    669 
    670 /*  The 'on the fly' encryption key update for for 128 bit keys */
    671 
    672 static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
    673 {   uint_8t cc;
    674 
    675     k[0] ^= s_box(k[13]) ^ *rc;
    676     k[1] ^= s_box(k[14]);
    677     k[2] ^= s_box(k[15]);
    678     k[3] ^= s_box(k[12]);
    679     *rc = f2( *rc );
    680 
    681     for(cc = 4; cc < 16; cc += 4 )
    682     {
    683         k[cc + 0] ^= k[cc - 4];
    684         k[cc + 1] ^= k[cc - 3];
    685         k[cc + 2] ^= k[cc - 2];
    686         k[cc + 3] ^= k[cc - 1];
    687     }
    688 }
    689 
    690 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
    691 
    692 void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    693                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
    694 {   uint_8t s1[N_BLOCK], r, rc = 1;
    695 
    696     if(o_key != key)
    697         block_copy( o_key, key );
    698     copy_and_key( s1, in, o_key );
    699 
    700     for( r = 1 ; r < 10 ; ++r )
    701 #if defined( VERSION_1 )
    702     {
    703         mix_sub_columns( s1 );
    704         update_encrypt_key_128( o_key, &rc );
    705         add_round_key( s1, o_key );
    706     }
    707 #else
    708     {   uint_8t s2[N_BLOCK];
    709         mix_sub_columns( s2, s1 );
    710         update_encrypt_key_128( o_key, &rc );
    711         copy_and_key( s1, s2, o_key );
    712     }
    713 #endif
    714 
    715     shift_sub_rows( s1 );
    716     update_encrypt_key_128( o_key, &rc );
    717     copy_and_key( out, s1, o_key );
    718 }
    719 
    720 #endif
    721 
    722 #if defined( AES_DEC_128_OTFK )
    723 
    724 /*  The 'on the fly' decryption key update for for 128 bit keys */
    725 
    726 static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
    727 {   uint_8t cc;
    728 
    729     for( cc = 12; cc > 0; cc -= 4 )
    730     {
    731         k[cc + 0] ^= k[cc - 4];
    732         k[cc + 1] ^= k[cc - 3];
    733         k[cc + 2] ^= k[cc - 2];
    734         k[cc + 3] ^= k[cc - 1];
    735     }
    736     *rc = d2(*rc);
    737     k[0] ^= s_box(k[13]) ^ *rc;
    738     k[1] ^= s_box(k[14]);
    739     k[2] ^= s_box(k[15]);
    740     k[3] ^= s_box(k[12]);
    741 }
    742 
    743 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
    744 
    745 void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    746                       const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
    747 {
    748     uint_8t s1[N_BLOCK], r, rc = 0x6c;
    749     if(o_key != key)
    750         block_copy( o_key, key );
    751 
    752     copy_and_key( s1, in, o_key );
    753     inv_shift_sub_rows( s1 );
    754 
    755     for( r = 10 ; --r ; )
    756 #if defined( VERSION_1 )
    757     {
    758         update_decrypt_key_128( o_key, &rc );
    759         add_round_key( s1, o_key );
    760         inv_mix_sub_columns( s1 );
    761     }
    762 #else
    763     {   uint_8t s2[N_BLOCK];
    764         update_decrypt_key_128( o_key, &rc );
    765         copy_and_key( s2, s1, o_key );
    766         inv_mix_sub_columns( s1, s2 );
    767     }
    768 #endif
    769     update_decrypt_key_128( o_key, &rc );
    770     copy_and_key( out, s1, o_key );
    771 }
    772 
    773 #endif
    774 
    775 #if defined( AES_ENC_256_OTFK )
    776 
    777 /*  The 'on the fly' encryption key update for for 256 bit keys */
    778 
    779 static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
    780 {   uint_8t cc;
    781 
    782     k[0] ^= s_box(k[29]) ^ *rc;
    783     k[1] ^= s_box(k[30]);
    784     k[2] ^= s_box(k[31]);
    785     k[3] ^= s_box(k[28]);
    786     *rc = f2( *rc );
    787 
    788     for(cc = 4; cc < 16; cc += 4)
    789     {
    790         k[cc + 0] ^= k[cc - 4];
    791         k[cc + 1] ^= k[cc - 3];
    792         k[cc + 2] ^= k[cc - 2];
    793         k[cc + 3] ^= k[cc - 1];
    794     }
    795 
    796     k[16] ^= s_box(k[12]);
    797     k[17] ^= s_box(k[13]);
    798     k[18] ^= s_box(k[14]);
    799     k[19] ^= s_box(k[15]);
    800 
    801     for( cc = 20; cc < 32; cc += 4 )
    802     {
    803         k[cc + 0] ^= k[cc - 4];
    804         k[cc + 1] ^= k[cc - 3];
    805         k[cc + 2] ^= k[cc - 2];
    806         k[cc + 3] ^= k[cc - 1];
    807     }
    808 }
    809 
    810 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
    811 
    812 void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    813                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
    814 {
    815     uint_8t s1[N_BLOCK], r, rc = 1;
    816     if(o_key != key)
    817     {
    818         block_copy( o_key, key );
    819         block_copy( o_key + 16, key + 16 );
    820     }
    821     copy_and_key( s1, in, o_key );
    822 
    823     for( r = 1 ; r < 14 ; ++r )
    824 #if defined( VERSION_1 )
    825     {
    826         mix_sub_columns(s1);
    827         if( r & 1 )
    828             add_round_key( s1, o_key + 16 );
    829         else
    830         {
    831             update_encrypt_key_256( o_key, &rc );
    832             add_round_key( s1, o_key );
    833         }
    834     }
    835 #else
    836     {   uint_8t s2[N_BLOCK];
    837         mix_sub_columns( s2, s1 );
    838         if( r & 1 )
    839             copy_and_key( s1, s2, o_key + 16 );
    840         else
    841         {
    842             update_encrypt_key_256( o_key, &rc );
    843             copy_and_key( s1, s2, o_key );
    844         }
    845     }
    846 #endif
    847 
    848     shift_sub_rows( s1 );
    849     update_encrypt_key_256( o_key, &rc );
    850     copy_and_key( out, s1, o_key );
    851 }
    852 
    853 #endif
    854 
    855 #if defined( AES_DEC_256_OTFK )
    856 
    857 /*  The 'on the fly' encryption key update for for 256 bit keys */
    858 
    859 static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
    860 {   uint_8t cc;
    861 
    862     for(cc = 28; cc > 16; cc -= 4)
    863     {
    864         k[cc + 0] ^= k[cc - 4];
    865         k[cc + 1] ^= k[cc - 3];
    866         k[cc + 2] ^= k[cc - 2];
    867         k[cc + 3] ^= k[cc - 1];
    868     }
    869 
    870     k[16] ^= s_box(k[12]);
    871     k[17] ^= s_box(k[13]);
    872     k[18] ^= s_box(k[14]);
    873     k[19] ^= s_box(k[15]);
    874 
    875     for(cc = 12; cc > 0; cc -= 4)
    876     {
    877         k[cc + 0] ^= k[cc - 4];
    878         k[cc + 1] ^= k[cc - 3];
    879         k[cc + 2] ^= k[cc - 2];
    880         k[cc + 3] ^= k[cc - 1];
    881     }
    882 
    883     *rc = d2(*rc);
    884     k[0] ^= s_box(k[29]) ^ *rc;
    885     k[1] ^= s_box(k[30]);
    886     k[2] ^= s_box(k[31]);
    887     k[3] ^= s_box(k[28]);
    888 }
    889 
    890 /*  Decrypt a single block of 16 bytes with 'on the fly'
    891     256 bit keying
    892 */
    893 void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    894                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
    895 {
    896     uint_8t s1[N_BLOCK], r, rc = 0x80;
    897 
    898     if(o_key != key)
    899     {
    900         block_copy( o_key, key );
    901         block_copy( o_key + 16, key + 16 );
    902     }
    903 
    904     copy_and_key( s1, in, o_key );
    905     inv_shift_sub_rows( s1 );
    906 
    907     for( r = 14 ; --r ; )
    908 #if defined( VERSION_1 )
    909     {
    910         if( ( r & 1 ) )
    911         {
    912             update_decrypt_key_256( o_key, &rc );
    913             add_round_key( s1, o_key + 16 );
    914         }
    915         else
    916             add_round_key( s1, o_key );
    917         inv_mix_sub_columns( s1 );
    918     }
    919 #else
    920     {   uint_8t s2[N_BLOCK];
    921         if( ( r & 1 ) )
    922         {
    923             update_decrypt_key_256( o_key, &rc );
    924             copy_and_key( s2, s1, o_key + 16 );
    925         }
    926         else
    927             copy_and_key( s2, s1, o_key );
    928         inv_mix_sub_columns( s1, s2 );
    929     }
    930 #endif
    931     copy_and_key( out, s1, o_key );
    932 }
    933 
    934 #endif
    935