Home | History | Annotate | Download | only in smp
      1 /*
      2  ---------------------------------------------------------------------------
      3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
      4 
      5  LICENSE TERMS
      6 
      7  The redistribution and use of this software (with or without changes)
      8  is allowed without the payment of fees or royalties provided that:
      9 
     10   1. source code distributions include the above copyright notice, this
     11      list of conditions and the following disclaimer;
     12 
     13   2. binary distributions include the above copyright notice, this list
     14      of conditions and the following disclaimer in their documentation;
     15 
     16   3. the name of the copyright holder is not used to endorse products
     17      built using this software without specific written permission.
     18 
     19  DISCLAIMER
     20 
     21  This software is provided 'as is' with no explicit or implied warranties
     22  in respect of its properties, including, but not limited to, correctness
     23  and/or fitness for purpose.
     24  ---------------------------------------------------------------------------
     25  Issue 09/09/2006
     26 
     27  This is an AES implementation that uses only 8-bit byte operations on the
     28  cipher state (there are options to use 32-bit types if available).
     29 
     30  The combination of mix columns and byte substitution used here is based on
     31  that developed by Karl Malbrain. His contribution is acknowledged.
     32  */
     33 
     34 /* define if you have a fast memcpy function on your system */
     35 #if 1
     36 #  define HAVE_MEMCPY
     37 #  include <string.h>
     38 #if 0
     39 #  if defined( _MSC_VER )
     40 #    include <intrin.h>
     41 #    pragma intrinsic( memcpy )
     42 #  endif
     43 #endif
     44 #endif
     45 
     46 #include <stdlib.h>
     47 
     48 /* define if you have fast 32-bit types on your system */
     49 #if 1
     50 #  define HAVE_UINT_32T
     51 #endif
     52 
     53 /* define if you don't want any tables */
     54 #if 1
     55 #  define USE_TABLES
     56 #endif
     57 
     58 /*  On Intel Core 2 duo VERSION_1 is faster */
     59 
     60 /* alternative versions (test for performance on your system) */
     61 #if 1
     62 #  define VERSION_1
     63 #endif
     64 
     65 #include "aes.h"
     66 
     67 #if defined( HAVE_UINT_32T )
     68   typedef unsigned long uint_32t;
     69 #endif
     70 
     71 /* functions for finite field multiplication in the AES Galois field    */
     72 
     73 #define WPOLY   0x011b
     74 #define BPOLY     0x1b
     75 #define DPOLY   0x008d
     76 
     77 #define f1(x)   (x)
     78 #define f2(x)   ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
     79 #define f4(x)   ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
     80 #define f8(x)   ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
     81                           ^ (((x >> 5) & 4) * WPOLY))
     82 #define d2(x)   (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
     83 
     84 #define f3(x)   (f2(x) ^ x)
     85 #define f9(x)   (f8(x) ^ x)
     86 #define fb(x)   (f8(x) ^ f2(x) ^ x)
     87 #define fd(x)   (f8(x) ^ f4(x) ^ x)
     88 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
     89 
     90 #if defined( USE_TABLES )
     91 
     92 #define sb_data(w) {    /* S Box data values */                            \
     93     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
     94     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
     95     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
     96     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
     97     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
     98     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
     99     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
    100     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
    101     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
    102     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
    103     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
    104     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
    105     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
    106     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
    107     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
    108     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
    109     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
    110     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
    111     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
    112     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
    113     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
    114     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
    115     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
    116     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
    117     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
    118     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
    119     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
    120     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
    121     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
    122     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
    123     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
    124     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
    125 
    126 #define isb_data(w) {   /* inverse S Box data values */                    \
    127     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
    128     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
    129     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
    130     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
    131     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
    132     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
    133     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
    134     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
    135     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
    136     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
    137     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
    138     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
    139     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
    140     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
    141     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
    142     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
    143     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
    144     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
    145     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
    146     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
    147     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
    148     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
    149     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
    150     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
    151     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
    152     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
    153     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
    154     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
    155     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
    156     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
    157     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
    158     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
    159 
    160 #define mm_data(w) {    /* basic data for forming finite field tables */   \
    161     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
    162     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
    163     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
    164     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
    165     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
    166     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
    167     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
    168     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
    169     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
    170     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
    171     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
    172     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
    173     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
    174     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
    175     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
    176     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
    177     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
    178     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
    179     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
    180     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
    181     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
    182     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
    183     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
    184     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
    185     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
    186     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
    187     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
    188     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
    189     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
    190     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
    191     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
    192     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
    193 
    194 static const uint_8t sbox[256]  =  sb_data(f1);
    195 static const uint_8t isbox[256] = isb_data(f1);
    196 
    197 static const uint_8t gfm2_sbox[256] = sb_data(f2);
    198 static const uint_8t gfm3_sbox[256] = sb_data(f3);
    199 
    200 static const uint_8t gfmul_9[256] = mm_data(f9);
    201 static const uint_8t gfmul_b[256] = mm_data(fb);
    202 static const uint_8t gfmul_d[256] = mm_data(fd);
    203 static const uint_8t gfmul_e[256] = mm_data(fe);
    204 
    205 #define s_box(x)     sbox[(x)]
    206 #define is_box(x)    isbox[(x)]
    207 #define gfm2_sb(x)   gfm2_sbox[(x)]
    208 #define gfm3_sb(x)   gfm3_sbox[(x)]
    209 #define gfm_9(x)     gfmul_9[(x)]
    210 #define gfm_b(x)     gfmul_b[(x)]
    211 #define gfm_d(x)     gfmul_d[(x)]
    212 #define gfm_e(x)     gfmul_e[(x)]
    213 
    214 #else
    215 
    216 /* this is the high bit of x right shifted by 1 */
    217 /* position. Since the starting polynomial has  */
    218 /* 9 bits (0x11b), this right shift keeps the   */
    219 /* values of all top bits within a byte         */
    220 
    221 static uint_8t hibit(const uint_8t x)
    222 {   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
    223 
    224     r |= (r >> 2);
    225     r |= (r >> 4);
    226     return (r + 1) >> 1;
    227 }
    228 
    229 /* return the inverse of the finite field element x */
    230 
    231 static uint_8t gf_inv(const uint_8t x)
    232 {   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
    233 
    234     if(x < 2)
    235         return x;
    236 
    237     for( ; ; )
    238     {
    239         if(n1)
    240             while(n2 >= n1)             /* divide polynomial p2 by p1    */
    241             {
    242                 n2 /= n1;               /* shift smaller polynomial left */
    243                 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
    244                 v2 ^= (v1 * n2);        /* shift accumulated value and   */
    245                 n2 = hibit(p2);         /* add into result               */
    246             }
    247         else
    248             return v1;
    249 
    250         if(n2)                          /* repeat with values swapped    */
    251             while(n1 >= n2)
    252             {
    253                 n1 /= n2;
    254                 p1 ^= p2 * n1;
    255                 v1 ^= v2 * n1;
    256                 n1 = hibit(p1);
    257             }
    258         else
    259             return v2;
    260     }
    261 }
    262 
    263 /* The forward and inverse affine transformations used in the S-box */
    264 uint_8t fwd_affine(const uint_8t x)
    265 {
    266 #if defined( HAVE_UINT_32T )
    267     uint_32t w = x;
    268     w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
    269     return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
    270 #else
    271     return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
    272                     ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
    273 #endif
    274 }
    275 
    276 uint_8t inv_affine(const uint_8t x)
    277 {
    278 #if defined( HAVE_UINT_32T )
    279     uint_32t w = x;
    280     w = (w << 1) ^ (w << 3) ^ (w << 6);
    281     return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
    282 #else
    283     return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
    284                 ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
    285 #endif
    286 }
    287 
    288 #define s_box(x)   fwd_affine(gf_inv(x))
    289 #define is_box(x)  gf_inv(inv_affine(x))
    290 #define gfm2_sb(x) f2(s_box(x))
    291 #define gfm3_sb(x) f3(s_box(x))
    292 #define gfm_9(x)   f9(x)
    293 #define gfm_b(x)   fb(x)
    294 #define gfm_d(x)   fd(x)
    295 #define gfm_e(x)   fe(x)
    296 
    297 #endif
    298 
    299 #if defined( HAVE_MEMCPY )
    300 #  define block_copy_nn(d, s, l)    memcpy(d, s, l)
    301 #  define block_copy(d, s)          memcpy(d, s, N_BLOCK)
    302 #else
    303 #  define block_copy_nn(d, s, l)    copy_block_nn(d, s, l)
    304 #  define block_copy(d, s)          copy_block(d, s)
    305 #endif
    306 
    307 #if !defined( HAVE_MEMCPY )
    308 static void copy_block( void *d, const void *s )
    309 {
    310 #if defined( HAVE_UINT_32T )
    311     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0];
    312     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1];
    313     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2];
    314     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3];
    315 #else
    316     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0];
    317     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1];
    318     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2];
    319     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3];
    320     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4];
    321     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5];
    322     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6];
    323     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7];
    324     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8];
    325     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9];
    326     ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
    327     ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
    328     ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
    329     ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
    330     ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
    331     ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
    332 #endif
    333 }
    334 
    335 static void copy_block_nn( void * d, const void *s, uint_8t nn )
    336 {
    337     while( nn-- )
    338         *((uint_8t*)d)++ = *((uint_8t*)s)++;
    339 }
    340 #endif
    341 
    342 static void xor_block( void *d, const void *s )
    343 {
    344 #if defined( HAVE_UINT_32T )
    345     ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0];
    346     ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1];
    347     ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2];
    348     ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3];
    349 #else
    350     ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0];
    351     ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1];
    352     ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2];
    353     ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3];
    354     ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4];
    355     ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5];
    356     ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6];
    357     ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7];
    358     ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8];
    359     ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9];
    360     ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
    361     ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
    362     ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
    363     ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
    364     ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
    365     ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
    366 #endif
    367 }
    368 
    369 static void copy_and_key( void *d, const void *s, const void *k )
    370 {
    371 #if defined( HAVE_UINT_32T )
    372     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0];
    373     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1];
    374     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2];
    375     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3];
    376 #elif 1
    377     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0];
    378     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1];
    379     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2];
    380     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3];
    381     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4];
    382     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5];
    383     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6];
    384     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7];
    385     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8];
    386     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9];
    387     ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
    388     ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
    389     ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
    390     ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
    391     ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
    392     ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
    393 #else
    394     block_copy(d, s);
    395     xor_block(d, k);
    396 #endif
    397 }
    398 
    399 static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] )
    400 {
    401     xor_block(d, k);
    402 }
    403 
    404 static void shift_sub_rows( uint_8t st[N_BLOCK] )
    405 {   uint_8t tt;
    406 
    407     st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
    408     st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
    409 
    410     tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
    411     st[ 9] = s_box(st[13]); st[13] = s_box( tt );
    412 
    413     tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
    414     tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
    415 
    416     tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
    417     st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
    418 }
    419 
    420 static void inv_shift_sub_rows( uint_8t st[N_BLOCK] )
    421 {   uint_8t tt;
    422 
    423     st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
    424     st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
    425 
    426     tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
    427     st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
    428 
    429     tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
    430     tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
    431 
    432     tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
    433     st[11] = is_box(st[15]); st[15] = is_box( tt );
    434 }
    435 
    436 #if defined( VERSION_1 )
    437   static void mix_sub_columns( uint_8t dt[N_BLOCK] )
    438   { uint_8t st[N_BLOCK];
    439     block_copy(st, dt);
    440 #else
    441   static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
    442   {
    443 #endif
    444     dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
    445     dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
    446     dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
    447     dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
    448 
    449     dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
    450     dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
    451     dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
    452     dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
    453 
    454     dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
    455     dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
    456     dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
    457     dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
    458 
    459     dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
    460     dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
    461     dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
    462     dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
    463   }
    464 
    465 #if defined( VERSION_1 )
    466   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] )
    467   { uint_8t st[N_BLOCK];
    468     block_copy(st, dt);
    469 #else
    470   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
    471   {
    472 #endif
    473     dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
    474     dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
    475     dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
    476     dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
    477 
    478     dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
    479     dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
    480     dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
    481     dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
    482 
    483     dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
    484     dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
    485     dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
    486     dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
    487 
    488     dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
    489     dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
    490     dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
    491     dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
    492   }
    493 
    494 #if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
    495 
    496 /*  Set the cipher key for the pre-keyed version */
    497 
    498 return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] )
    499 {
    500     uint_8t cc, rc, hi;
    501 
    502     switch( keylen )
    503     {
    504     case 16:
    505     case 128:
    506         keylen = 16;
    507         break;
    508     case 24:
    509     case 192:
    510         keylen = 24;
    511         break;
    512     case 32:
    513     /*    case 256:           length in bits (256 = 8*32) */
    514         keylen = 32;
    515         break;
    516     default:
    517         ctx->rnd = 0;
    518         return (return_type)-1;
    519     }
    520     block_copy_nn(ctx->ksch, key, keylen);
    521     hi = (keylen + 28) << 2;
    522     ctx->rnd = (hi >> 4) - 1;
    523     for( cc = keylen, rc = 1; cc < hi; cc += 4 )
    524     {   uint_8t tt, t0, t1, t2, t3;
    525 
    526         t0 = ctx->ksch[cc - 4];
    527         t1 = ctx->ksch[cc - 3];
    528         t2 = ctx->ksch[cc - 2];
    529         t3 = ctx->ksch[cc - 1];
    530         if( cc % keylen == 0 )
    531         {
    532             tt = t0;
    533             t0 = s_box(t1) ^ rc;
    534             t1 = s_box(t2);
    535             t2 = s_box(t3);
    536             t3 = s_box(tt);
    537             rc = f2(rc);
    538         }
    539         else if( keylen > 24 && cc % keylen == 16 )
    540         {
    541             t0 = s_box(t0);
    542             t1 = s_box(t1);
    543             t2 = s_box(t2);
    544             t3 = s_box(t3);
    545         }
    546         tt = cc - keylen;
    547         ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
    548         ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
    549         ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
    550         ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
    551     }
    552     return 0;
    553 }
    554 
    555 #endif
    556 
    557 #if defined( AES_ENC_PREKEYED )
    558 
    559 /*  Encrypt a single block of 16 bytes */
    560 
    561 return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char  out[N_BLOCK], const aes_context ctx[1] )
    562 {
    563     if( ctx->rnd )
    564     {
    565         uint_8t s1[N_BLOCK], r;
    566         copy_and_key( s1, in, ctx->ksch );
    567 
    568         for( r = 1 ; r < ctx->rnd ; ++r )
    569 #if defined( VERSION_1 )
    570         {
    571             mix_sub_columns( s1 );
    572             add_round_key( s1, ctx->ksch + r * N_BLOCK);
    573         }
    574 #else
    575         {   uint_8t s2[N_BLOCK];
    576             mix_sub_columns( s2, s1 );
    577             copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
    578         }
    579 #endif
    580         shift_sub_rows( s1 );
    581         copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
    582     }
    583     else
    584         return (return_type)-1;
    585     return 0;
    586 }
    587 
    588 /* CBC encrypt a number of blocks (input and return an IV) */
    589 
    590 return_type aes_cbc_encrypt( const unsigned char *in, unsigned char *out,
    591                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
    592 {
    593 
    594     while(n_block--)
    595     {
    596         xor_block(iv, in);
    597         if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
    598 			return EXIT_FAILURE;
    599         memcpy(out, iv, N_BLOCK);
    600         in += N_BLOCK;
    601         out += N_BLOCK;
    602     }
    603     return EXIT_SUCCESS;
    604 }
    605 
    606 #endif
    607 
    608 #if defined( AES_DEC_PREKEYED )
    609 
    610 /*  Decrypt a single block of 16 bytes */
    611 
    612 return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
    613 {
    614     if( ctx->rnd )
    615     {
    616         uint_8t s1[N_BLOCK], r;
    617         copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
    618         inv_shift_sub_rows( s1 );
    619 
    620         for( r = ctx->rnd ; --r ; )
    621 #if defined( VERSION_1 )
    622         {
    623             add_round_key( s1, ctx->ksch + r * N_BLOCK );
    624             inv_mix_sub_columns( s1 );
    625         }
    626 #else
    627         {   uint_8t s2[N_BLOCK];
    628             copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
    629             inv_mix_sub_columns( s1, s2 );
    630         }
    631 #endif
    632         copy_and_key( out, s1, ctx->ksch );
    633     }
    634     else
    635         return (return_type)-1;
    636     return 0;
    637 }
    638 
    639 /* CBC decrypt a number of blocks (input and return an IV) */
    640 
    641 return_type aes_cbc_decrypt( const unsigned char *in, unsigned char *out,
    642                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
    643 {
    644     while(n_block--)
    645     {   uint_8t tmp[N_BLOCK];
    646 
    647         memcpy(tmp, in, N_BLOCK);
    648         if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
    649 			return EXIT_FAILURE;
    650         xor_block(out, iv);
    651         memcpy(iv, tmp, N_BLOCK);
    652         in += N_BLOCK;
    653         out += N_BLOCK;
    654     }
    655     return EXIT_SUCCESS;
    656 }
    657 
    658 #endif
    659 
    660 #if defined( AES_ENC_128_OTFK )
    661 
    662 /*  The 'on the fly' encryption key update for for 128 bit keys */
    663 
    664 static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
    665 {   uint_8t cc;
    666 
    667     k[0] ^= s_box(k[13]) ^ *rc;
    668     k[1] ^= s_box(k[14]);
    669     k[2] ^= s_box(k[15]);
    670     k[3] ^= s_box(k[12]);
    671     *rc = f2( *rc );
    672 
    673     for(cc = 4; cc < 16; cc += 4 )
    674     {
    675         k[cc + 0] ^= k[cc - 4];
    676         k[cc + 1] ^= k[cc - 3];
    677         k[cc + 2] ^= k[cc - 2];
    678         k[cc + 3] ^= k[cc - 1];
    679     }
    680 }
    681 
    682 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
    683 
    684 void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    685                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
    686 {   uint_8t s1[N_BLOCK], r, rc = 1;
    687 
    688     if(o_key != key)
    689         block_copy( o_key, key );
    690     copy_and_key( s1, in, o_key );
    691 
    692     for( r = 1 ; r < 10 ; ++r )
    693 #if defined( VERSION_1 )
    694     {
    695         mix_sub_columns( s1 );
    696         update_encrypt_key_128( o_key, &rc );
    697         add_round_key( s1, o_key );
    698     }
    699 #else
    700     {   uint_8t s2[N_BLOCK];
    701         mix_sub_columns( s2, s1 );
    702         update_encrypt_key_128( o_key, &rc );
    703         copy_and_key( s1, s2, o_key );
    704     }
    705 #endif
    706 
    707     shift_sub_rows( s1 );
    708     update_encrypt_key_128( o_key, &rc );
    709     copy_and_key( out, s1, o_key );
    710 }
    711 
    712 #endif
    713 
    714 #if defined( AES_DEC_128_OTFK )
    715 
    716 /*  The 'on the fly' decryption key update for for 128 bit keys */
    717 
    718 static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
    719 {   uint_8t cc;
    720 
    721     for( cc = 12; cc > 0; cc -= 4 )
    722     {
    723         k[cc + 0] ^= k[cc - 4];
    724         k[cc + 1] ^= k[cc - 3];
    725         k[cc + 2] ^= k[cc - 2];
    726         k[cc + 3] ^= k[cc - 1];
    727     }
    728     *rc = d2(*rc);
    729     k[0] ^= s_box(k[13]) ^ *rc;
    730     k[1] ^= s_box(k[14]);
    731     k[2] ^= s_box(k[15]);
    732     k[3] ^= s_box(k[12]);
    733 }
    734 
    735 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
    736 
    737 void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    738                       const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
    739 {
    740     uint_8t s1[N_BLOCK], r, rc = 0x6c;
    741     if(o_key != key)
    742         block_copy( o_key, key );
    743 
    744     copy_and_key( s1, in, o_key );
    745     inv_shift_sub_rows( s1 );
    746 
    747     for( r = 10 ; --r ; )
    748 #if defined( VERSION_1 )
    749     {
    750         update_decrypt_key_128( o_key, &rc );
    751         add_round_key( s1, o_key );
    752         inv_mix_sub_columns( s1 );
    753     }
    754 #else
    755     {   uint_8t s2[N_BLOCK];
    756         update_decrypt_key_128( o_key, &rc );
    757         copy_and_key( s2, s1, o_key );
    758         inv_mix_sub_columns( s1, s2 );
    759     }
    760 #endif
    761     update_decrypt_key_128( o_key, &rc );
    762     copy_and_key( out, s1, o_key );
    763 }
    764 
    765 #endif
    766 
    767 #if defined( AES_ENC_256_OTFK )
    768 
    769 /*  The 'on the fly' encryption key update for for 256 bit keys */
    770 
    771 static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
    772 {   uint_8t cc;
    773 
    774     k[0] ^= s_box(k[29]) ^ *rc;
    775     k[1] ^= s_box(k[30]);
    776     k[2] ^= s_box(k[31]);
    777     k[3] ^= s_box(k[28]);
    778     *rc = f2( *rc );
    779 
    780     for(cc = 4; cc < 16; cc += 4)
    781     {
    782         k[cc + 0] ^= k[cc - 4];
    783         k[cc + 1] ^= k[cc - 3];
    784         k[cc + 2] ^= k[cc - 2];
    785         k[cc + 3] ^= k[cc - 1];
    786     }
    787 
    788     k[16] ^= s_box(k[12]);
    789     k[17] ^= s_box(k[13]);
    790     k[18] ^= s_box(k[14]);
    791     k[19] ^= s_box(k[15]);
    792 
    793     for( cc = 20; cc < 32; cc += 4 )
    794     {
    795         k[cc + 0] ^= k[cc - 4];
    796         k[cc + 1] ^= k[cc - 3];
    797         k[cc + 2] ^= k[cc - 2];
    798         k[cc + 3] ^= k[cc - 1];
    799     }
    800 }
    801 
    802 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
    803 
    804 void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    805                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
    806 {
    807     uint_8t s1[N_BLOCK], r, rc = 1;
    808     if(o_key != key)
    809     {
    810         block_copy( o_key, key );
    811         block_copy( o_key + 16, key + 16 );
    812     }
    813     copy_and_key( s1, in, o_key );
    814 
    815     for( r = 1 ; r < 14 ; ++r )
    816 #if defined( VERSION_1 )
    817     {
    818         mix_sub_columns(s1);
    819         if( r & 1 )
    820             add_round_key( s1, o_key + 16 );
    821         else
    822         {
    823             update_encrypt_key_256( o_key, &rc );
    824             add_round_key( s1, o_key );
    825         }
    826     }
    827 #else
    828     {   uint_8t s2[N_BLOCK];
    829         mix_sub_columns( s2, s1 );
    830         if( r & 1 )
    831             copy_and_key( s1, s2, o_key + 16 );
    832         else
    833         {
    834             update_encrypt_key_256( o_key, &rc );
    835             copy_and_key( s1, s2, o_key );
    836         }
    837     }
    838 #endif
    839 
    840     shift_sub_rows( s1 );
    841     update_encrypt_key_256( o_key, &rc );
    842     copy_and_key( out, s1, o_key );
    843 }
    844 
    845 #endif
    846 
    847 #if defined( AES_DEC_256_OTFK )
    848 
    849 /*  The 'on the fly' encryption key update for for 256 bit keys */
    850 
    851 static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
    852 {   uint_8t cc;
    853 
    854     for(cc = 28; cc > 16; cc -= 4)
    855     {
    856         k[cc + 0] ^= k[cc - 4];
    857         k[cc + 1] ^= k[cc - 3];
    858         k[cc + 2] ^= k[cc - 2];
    859         k[cc + 3] ^= k[cc - 1];
    860     }
    861 
    862     k[16] ^= s_box(k[12]);
    863     k[17] ^= s_box(k[13]);
    864     k[18] ^= s_box(k[14]);
    865     k[19] ^= s_box(k[15]);
    866 
    867     for(cc = 12; cc > 0; cc -= 4)
    868     {
    869         k[cc + 0] ^= k[cc - 4];
    870         k[cc + 1] ^= k[cc - 3];
    871         k[cc + 2] ^= k[cc - 2];
    872         k[cc + 3] ^= k[cc - 1];
    873     }
    874 
    875     *rc = d2(*rc);
    876     k[0] ^= s_box(k[29]) ^ *rc;
    877     k[1] ^= s_box(k[30]);
    878     k[2] ^= s_box(k[31]);
    879     k[3] ^= s_box(k[28]);
    880 }
    881 
    882 /*  Decrypt a single block of 16 bytes with 'on the fly'
    883     256 bit keying
    884 */
    885 void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
    886                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
    887 {
    888     uint_8t s1[N_BLOCK], r, rc = 0x80;
    889 
    890     if(o_key != key)
    891     {
    892         block_copy( o_key, key );
    893         block_copy( o_key + 16, key + 16 );
    894     }
    895 
    896     copy_and_key( s1, in, o_key );
    897     inv_shift_sub_rows( s1 );
    898 
    899     for( r = 14 ; --r ; )
    900 #if defined( VERSION_1 )
    901     {
    902         if( ( r & 1 ) )
    903         {
    904             update_decrypt_key_256( o_key, &rc );
    905             add_round_key( s1, o_key + 16 );
    906         }
    907         else
    908             add_round_key( s1, o_key );
    909         inv_mix_sub_columns( s1 );
    910     }
    911 #else
    912     {   uint_8t s2[N_BLOCK];
    913         if( ( r & 1 ) )
    914         {
    915             update_decrypt_key_256( o_key, &rc );
    916             copy_and_key( s2, s1, o_key + 16 );
    917         }
    918         else
    919             copy_and_key( s2, s1, o_key );
    920         inv_mix_sub_columns( s1, s2 );
    921     }
    922 #endif
    923     copy_and_key( out, s1, o_key );
    924 }
    925 
    926 #endif
    927