Home | History | Annotate | Download | only in smp
      1 /*
      2  ---------------------------------------------------------------------------
      3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
      4 
      5  LICENSE TERMS
      6 
      7  The redistribution and use of this software (with or without changes)
      8  is allowed without the payment of fees or royalties provided that:
      9 
     10   1. source code distributions include the above copyright notice, this
     11      list of conditions and the following disclaimer;
     12 
     13   2. binary distributions include the above copyright notice, this list
     14      of conditions and the following disclaimer in their documentation;
     15 
     16   3. the name of the copyright holder is not used to endorse products
     17      built using this software without specific written permission.
     18 
     19  DISCLAIMER
     20 
     21  This software is provided 'as is' with no explicit or implied warranties
     22  in respect of its properties, including, but not limited to, correctness
     23  and/or fitness for purpose.
     24  ---------------------------------------------------------------------------
     25  Issue 09/09/2006
     26 
     27  This is an AES implementation that uses only 8-bit byte operations on the
     28  cipher state (there are options to use 32-bit types if available).
     29 
     30  The combination of mix columns and byte substitution used here is based on
     31  that developed by Karl Malbrain. His contribution is acknowledged.
     32  */
     33 
     34 /* define if you have a fast memcpy function on your system */
     35 #if 1
     36 #define HAVE_MEMCPY
     37 #include <string.h>
     38 #if 0
     39 #if defined(_MSC_VER)
     40 #include <intrin.h>
     41 #pragma intrinsic(memcpy)
     42 #endif
     43 #endif
     44 #endif
     45 
     46 #include <stdlib.h>
     47 
     48 /* add the target configuration to allow using internal data types and
     49  * compilation options */
     50 #include "bt_target.h"
     51 
     52 /* define if you have fast 32-bit types on your system */
     53 #if 1
     54 #define HAVE_UINT_32T
     55 #endif
     56 
     57 /* define if you don't want any tables */
     58 #if 1
     59 #define USE_TABLES
     60 #endif
     61 
     62 /*  On Intel Core 2 duo VERSION_1 is faster */
     63 
     64 /* alternative versions (test for performance on your system) */
     65 #if 1
     66 #define VERSION_1
     67 #endif
     68 
     69 #include "aes.h"
     70 
     71 #if defined(HAVE_UINT_32T)
     72 typedef uint32_t uint_32t;
     73 #endif
     74 
     75 /* functions for finite field multiplication in the AES Galois field    */
     76 
     77 #define WPOLY 0x011b
     78 #define BPOLY 0x1b
     79 #define DPOLY 0x008d
     80 
     81 #define f1(x) (x)
     82 #define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
     83 #define f4(x) \
     84   (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
     85 #define f8(x)                                                             \
     86   (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ \
     87    ((((x) >> 5) & 4) * WPOLY))
     88 #define d2(x) (((x) >> 1) ^ ((x)&1 ? DPOLY : 0))
     89 
     90 #define f3(x) (f2(x) ^ (x))
     91 #define f9(x) (f8(x) ^ (x))
     92 #define fb(x) (f8(x) ^ f2(x) ^ (x))
     93 #define fd(x) (f8(x) ^ f4(x) ^ (x))
     94 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
     95 
     96 #if defined(USE_TABLES)
     97 
     98 #define sb_data(w)                                                          \
     99   { /* S Box data values */                                                 \
    100     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), \
    101         w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab),      \
    102         w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59),      \
    103         w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c),      \
    104         w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26),      \
    105         w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5),      \
    106         w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), w(0xc7),      \
    107         w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), w(0x07),      \
    108         w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),      \
    109         w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a),      \
    110         w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3),      \
    111         w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20),      \
    112         w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), w(0x39),      \
    113         w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), w(0xaa),      \
    114         w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9),      \
    115         w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), w(0x51),      \
    116         w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),      \
    117         w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3),      \
    118         w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97),      \
    119         w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64),      \
    120         w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), w(0xdc),      \
    121         w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), w(0xb8),      \
    122         w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32),      \
    123         w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), w(0xc2),      \
    124         w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),      \
    125         w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e),      \
    126         w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a),      \
    127         w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c),      \
    128         w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f),      \
    129         w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), w(0xb5),      \
    130         w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), w(0x35),      \
    131         w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1),      \
    132         w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),      \
    133         w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28),      \
    134         w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6),      \
    135         w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0),      \
    136         w(0x54), w(0xbb), w(0x16)                                           \
    137   }
    138 
    139 #define isb_data(w)                                                         \
    140   { /* inverse S Box data values */                                         \
    141     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), \
    142         w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7),      \
    143         w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f),      \
    144         w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4),      \
    145         w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32),      \
    146         w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95),      \
    147         w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), w(0x08), w(0x2e),      \
    148         w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), w(0x76),      \
    149         w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),      \
    150         w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98),      \
    151         w(0x16), w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65),      \
    152         w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd),      \
    153         w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), w(0x57),      \
    154         w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), w(0xab),      \
    155         w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4),      \
    156         w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06), w(0xd0),      \
    157         w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),      \
    158         w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a),      \
    159         w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67),      \
    160         w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0),      \
    161         w(0xb4), w(0xe6), w(0x73), w(0x96), w(0xac), w(0x74), w(0x22),      \
    162         w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), w(0x37),      \
    163         w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1),      \
    164         w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), w(0x6f),      \
    165         w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),      \
    166         w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79),      \
    167         w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd),      \
    168         w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88),      \
    169         w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59),      \
    170         w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), w(0x7f),      \
    171         w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), w(0xe5),      \
    172         w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0),      \
    173         w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),      \
    174         w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99),      \
    175         w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77),      \
    176         w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55),      \
    177         w(0x21), w(0x0c), w(0x7d)                                           \
    178   }
    179 
    180 #define mm_data(w)                                                          \
    181   { /* basic data for forming finite field tables */                        \
    182     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), \
    183         w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e),      \
    184         w(0x0f), w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15),      \
    185         w(0x16), w(0x17), w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c),      \
    186         w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23),      \
    187         w(0x24), w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a),      \
    188         w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f), w(0x30), w(0x31),      \
    189         w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37), w(0x38),      \
    190         w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),      \
    191         w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46),      \
    192         w(0x47), w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d),      \
    193         w(0x4e), w(0x4f), w(0x50), w(0x51), w(0x52), w(0x53), w(0x54),      \
    194         w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a), w(0x5b),      \
    195         w(0x5c), w(0x5d), w(0x5e), w(0x5f), w(0x60), w(0x61), w(0x62),      \
    196         w(0x63), w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69),      \
    197         w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f), w(0x70),      \
    198         w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),      \
    199         w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e),      \
    200         w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85),      \
    201         w(0x86), w(0x87), w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c),      \
    202         w(0x8d), w(0x8e), w(0x8f), w(0x90), w(0x91), w(0x92), w(0x93),      \
    203         w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99), w(0x9a),      \
    204         w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1),      \
    205         w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), w(0xa8),      \
    206         w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),      \
    207         w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6),      \
    208         w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd),      \
    209         w(0xbe), w(0xbf), w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4),      \
    210         w(0xc5), w(0xc6), w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb),      \
    211         w(0xcc), w(0xcd), w(0xce), w(0xcf), w(0xd0), w(0xd1), w(0xd2),      \
    212         w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), w(0xd8), w(0xd9),      \
    213         w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0),      \
    214         w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),      \
    215         w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee),      \
    216         w(0xef), w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5),      \
    217         w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc),      \
    218         w(0xfd), w(0xfe), w(0xff)                                           \
    219   }
    220 
    221 static const uint_8t sbox[256] = sb_data(f1);
    222 static const uint_8t isbox[256] = isb_data(f1);
    223 
    224 static const uint_8t gfm2_sbox[256] = sb_data(f2);
    225 static const uint_8t gfm3_sbox[256] = sb_data(f3);
    226 
    227 static const uint_8t gfmul_9[256] = mm_data(f9);
    228 static const uint_8t gfmul_b[256] = mm_data(fb);
    229 static const uint_8t gfmul_d[256] = mm_data(fd);
    230 static const uint_8t gfmul_e[256] = mm_data(fe);
    231 
    232 #define s_box(x) sbox[(x)]
    233 #define is_box(x) isbox[(x)]
    234 #define gfm2_sb(x) gfm2_sbox[(x)]
    235 #define gfm3_sb(x) gfm3_sbox[(x)]
    236 #define gfm_9(x) gfmul_9[(x)]
    237 #define gfm_b(x) gfmul_b[(x)]
    238 #define gfm_d(x) gfmul_d[(x)]
    239 #define gfm_e(x) gfmul_e[(x)]
    240 
    241 #else
    242 
    243 /* this is the high bit of x right shifted by 1 */
    244 /* position. Since the starting polynomial has  */
    245 /* 9 bits (0x11b), this right shift keeps the   */
    246 /* values of all top bits within a byte         */
    247 
    248 static uint_8t hibit(const uint_8t x) {
    249   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
    250 
    251   r |= (r >> 2);
    252   r |= (r >> 4);
    253   return (r + 1) >> 1;
    254 }
    255 
    256 /* return the inverse of the finite field element x */
    257 
    258 static uint_8t gf_inv(const uint_8t x) {
    259   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
    260 
    261   if (x < 2) return x;
    262 
    263   for (;;) {
    264     if (n1)
    265       while (n2 >= n1) /* divide polynomial p2 by p1    */
    266       {
    267         n2 /= n1;               /* shift smaller polynomial left */
    268         p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
    269         v2 ^= (v1 * n2);        /* shift accumulated value and   */
    270         n2 = hibit(p2);         /* add into result               */
    271       }
    272     else
    273       return v1;
    274 
    275     if (n2) /* repeat with values swapped    */
    276       while (n1 >= n2) {
    277         n1 /= n2;
    278         p1 ^= p2 * n1;
    279         v1 ^= v2 * n1;
    280         n1 = hibit(p1);
    281       }
    282     else
    283       return v2;
    284   }
    285 }
    286 
    287 /* The forward and inverse affine transformations used in the S-box */
    288 uint_8t fwd_affine(const uint_8t x) {
    289 #if defined(HAVE_UINT_32T)
    290   uint_32t w = x;
    291   w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
    292   return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
    293 #else
    294   return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^
    295          (x >> 6) ^ (x >> 5) ^ (x >> 4);
    296 #endif
    297 }
    298 
    299 uint_8t inv_affine(const uint_8t x) {
    300 #if defined(HAVE_UINT_32T)
    301   uint_32t w = x;
    302   w = (w << 1) ^ (w << 3) ^ (w << 6);
    303   return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
    304 #else
    305   return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
    306 #endif
    307 }
    308 
    309 #define s_box(x) fwd_affine(gf_inv(x))
    310 #define is_box(x) gf_inv(inv_affine(x))
    311 #define gfm2_sb(x) f2(s_box(x))
    312 #define gfm3_sb(x) f3(s_box(x))
    313 #define gfm_9(x) f9(x)
    314 #define gfm_b(x) fb(x)
    315 #define gfm_d(x) fd(x)
    316 #define gfm_e(x) fe(x)
    317 
    318 #endif
    319 
    320 #if defined(HAVE_MEMCPY)
    321 #define block_copy_nn(d, s, l) memcpy(d, s, l)
    322 #define block_copy(d, s) memcpy(d, s, N_BLOCK)
    323 #else
    324 #define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
    325 #define block_copy(d, s) copy_block(d, s)
    326 #endif
    327 
    328 #if !defined(HAVE_MEMCPY)
    329 static void copy_block(void* d, const void* s) {
    330 #if defined(HAVE_UINT_32T)
    331   ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
    332   ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
    333   ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
    334   ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
    335 #else
    336   ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
    337   ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
    338   ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
    339   ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
    340   ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
    341   ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
    342   ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
    343   ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
    344   ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
    345   ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
    346   ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
    347   ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
    348   ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
    349   ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
    350   ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
    351   ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
    352 #endif
    353 }
    354 
    355 static void copy_block_nn(void* d, const void* s, uint_8t nn) {
    356   while (nn--) *((uint_8t*)d)++ = *((uint_8t*)s)++;
    357 }
    358 #endif
    359 
    360 static void xor_block(void* d, const void* s) {
    361 #if defined(HAVE_UINT_32T)
    362   ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
    363   ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
    364   ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
    365   ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
    366 #else
    367   ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
    368   ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
    369   ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
    370   ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
    371   ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
    372   ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
    373   ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
    374   ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
    375   ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
    376   ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
    377   ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
    378   ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
    379   ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
    380   ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
    381   ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
    382   ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
    383 #endif
    384 }
    385 
    386 static void copy_and_key(void* d, const void* s, const void* k) {
    387 #if defined(HAVE_UINT_32T)
    388   ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
    389   ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
    390   ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
    391   ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
    392 #elif 1
    393   ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
    394   ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
    395   ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
    396   ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
    397   ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
    398   ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
    399   ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
    400   ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
    401   ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
    402   ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
    403   ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
    404   ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
    405   ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
    406   ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
    407   ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
    408   ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
    409 #else
    410   block_copy(d, s);
    411   xor_block(d, k);
    412 #endif
    413 }
    414 
    415 static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) {
    416   xor_block(d, k);
    417 }
    418 
    419 static void shift_sub_rows(uint_8t st[N_BLOCK]) {
    420   uint_8t tt;
    421 
    422   st[0] = s_box(st[0]);
    423   st[4] = s_box(st[4]);
    424   st[8] = s_box(st[8]);
    425   st[12] = s_box(st[12]);
    426 
    427   tt = st[1];
    428   st[1] = s_box(st[5]);
    429   st[5] = s_box(st[9]);
    430   st[9] = s_box(st[13]);
    431   st[13] = s_box(tt);
    432 
    433   tt = st[2];
    434   st[2] = s_box(st[10]);
    435   st[10] = s_box(tt);
    436   tt = st[6];
    437   st[6] = s_box(st[14]);
    438   st[14] = s_box(tt);
    439 
    440   tt = st[15];
    441   st[15] = s_box(st[11]);
    442   st[11] = s_box(st[7]);
    443   st[7] = s_box(st[3]);
    444   st[3] = s_box(tt);
    445 }
    446 
    447 static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
    448   uint_8t tt;
    449 
    450   st[0] = is_box(st[0]);
    451   st[4] = is_box(st[4]);
    452   st[8] = is_box(st[8]);
    453   st[12] = is_box(st[12]);
    454 
    455   tt = st[13];
    456   st[13] = is_box(st[9]);
    457   st[9] = is_box(st[5]);
    458   st[5] = is_box(st[1]);
    459   st[1] = is_box(tt);
    460 
    461   tt = st[2];
    462   st[2] = is_box(st[10]);
    463   st[10] = is_box(tt);
    464   tt = st[6];
    465   st[6] = is_box(st[14]);
    466   st[14] = is_box(tt);
    467 
    468   tt = st[3];
    469   st[3] = is_box(st[7]);
    470   st[7] = is_box(st[11]);
    471   st[11] = is_box(st[15]);
    472   st[15] = is_box(tt);
    473 }
    474 
    475 #if defined(VERSION_1)
    476 static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
    477   uint_8t st[N_BLOCK];
    478   block_copy(st, dt);
    479 #else
    480 static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
    481 #endif
    482   dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
    483   dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
    484   dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
    485   dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
    486 
    487   dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
    488   dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
    489   dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
    490   dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
    491 
    492   dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
    493   dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
    494   dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
    495   dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
    496 
    497   dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
    498   dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
    499   dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
    500   dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
    501 }
    502 
    503 #if defined(VERSION_1)
    504 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
    505   uint_8t st[N_BLOCK];
    506   block_copy(st, dt);
    507 #else
    508 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
    509 #endif
    510   dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
    511   dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
    512   dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
    513   dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
    514 
    515   dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
    516   dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
    517   dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
    518   dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
    519 
    520   dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
    521   dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
    522   dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
    523   dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
    524 
    525   dt[12] =
    526       is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
    527   dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
    528   dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
    529   dt[11] =
    530       is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
    531 }
    532 
    533 #if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
    534 
    535 /*  Set the cipher key for the pre-keyed version */
    536 /*  NOTE: If the length_type used for the key length is an
    537     unsigned 8-bit character, a key length of 256 bits must
    538     be entered as a length in bytes (valid inputs are hence
    539     128, 192, 16, 24 and 32).
    540 */
    541 
    542 return_type aes_set_key(const unsigned char key[], length_type keylen,
    543                         aes_context ctx[1]) {
    544   uint_8t cc, rc, hi;
    545 
    546   switch (keylen) {
    547     case 16:
    548     case 128: /* length in bits (128 = 8*16) */
    549       keylen = 16;
    550       break;
    551     case 24:
    552     case 192: /* length in bits (192 = 8*24) */
    553       keylen = 24;
    554       break;
    555     case 32:
    556       /*    case 256:           length in bits (256 = 8*32) */
    557       keylen = 32;
    558       break;
    559     default:
    560       ctx->rnd = 0;
    561       return (return_type)-1;
    562   }
    563   block_copy_nn(ctx->ksch, key, keylen);
    564   hi = (keylen + 28) << 2;
    565   ctx->rnd = (hi >> 4) - 1;
    566   for (cc = keylen, rc = 1; cc < hi; cc += 4) {
    567     uint_8t tt, t0, t1, t2, t3;
    568 
    569     t0 = ctx->ksch[cc - 4];
    570     t1 = ctx->ksch[cc - 3];
    571     t2 = ctx->ksch[cc - 2];
    572     t3 = ctx->ksch[cc - 1];
    573     if (cc % keylen == 0) {
    574       tt = t0;
    575       t0 = s_box(t1) ^ rc;
    576       t1 = s_box(t2);
    577       t2 = s_box(t3);
    578       t3 = s_box(tt);
    579       rc = f2(rc);
    580     } else if (keylen > 24 && cc % keylen == 16) {
    581       t0 = s_box(t0);
    582       t1 = s_box(t1);
    583       t2 = s_box(t2);
    584       t3 = s_box(t3);
    585     }
    586     tt = cc - keylen;
    587     ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
    588     ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
    589     ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
    590     ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
    591   }
    592   return 0;
    593 }
    594 
    595 #endif
    596 
    597 #if defined(AES_ENC_PREKEYED)
    598 
    599 /*  Encrypt a single block of 16 bytes */
    600 
    601 return_type aes_encrypt(const unsigned char in[N_BLOCK],
    602                         unsigned char out[N_BLOCK], const aes_context ctx[1]) {
    603   if (ctx->rnd) {
    604     uint_8t s1[N_BLOCK], r;
    605     copy_and_key(s1, in, ctx->ksch);
    606 
    607     for (r = 1; r < ctx->rnd; ++r)
    608 #if defined(VERSION_1)
    609     {
    610       mix_sub_columns(s1);
    611       add_round_key(s1, ctx->ksch + r * N_BLOCK);
    612     }
    613 #else
    614     {
    615       uint_8t s2[N_BLOCK];
    616       mix_sub_columns(s2, s1);
    617       copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
    618     }
    619 #endif
    620     shift_sub_rows(s1);
    621     copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
    622   } else
    623     return (return_type)-1;
    624   return 0;
    625 }
    626 
    627 /* CBC encrypt a number of blocks (input and return an IV) */
    628 
    629 return_type aes_cbc_encrypt(const unsigned char* in, unsigned char* out,
    630                             int n_block, unsigned char iv[N_BLOCK],
    631                             const aes_context ctx[1]) {
    632   while (n_block--) {
    633     xor_block(iv, in);
    634     if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
    635     memcpy(out, iv, N_BLOCK);
    636     in += N_BLOCK;
    637     out += N_BLOCK;
    638   }
    639   return EXIT_SUCCESS;
    640 }
    641 
    642 #endif
    643 
    644 #if defined(AES_DEC_PREKEYED)
    645 
    646 /*  Decrypt a single block of 16 bytes */
    647 
    648 return_type aes_decrypt(const unsigned char in[N_BLOCK],
    649                         unsigned char out[N_BLOCK], const aes_context ctx[1]) {
    650   if (ctx->rnd) {
    651     uint_8t s1[N_BLOCK], r;
    652     copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
    653     inv_shift_sub_rows(s1);
    654 
    655     for (r = ctx->rnd; --r;)
    656 #if defined(VERSION_1)
    657     {
    658       add_round_key(s1, ctx->ksch + r * N_BLOCK);
    659       inv_mix_sub_columns(s1);
    660     }
    661 #else
    662     {
    663       uint_8t s2[N_BLOCK];
    664       copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
    665       inv_mix_sub_columns(s1, s2);
    666     }
    667 #endif
    668     copy_and_key(out, s1, ctx->ksch);
    669   } else
    670     return (return_type)-1;
    671   return 0;
    672 }
    673 
    674 /* CBC decrypt a number of blocks (input and return an IV) */
    675 
    676 return_type aes_cbc_decrypt(const unsigned char* in, unsigned char* out,
    677                             int n_block, unsigned char iv[N_BLOCK],
    678                             const aes_context ctx[1]) {
    679   while (n_block--) {
    680     uint_8t tmp[N_BLOCK];
    681 
    682     memcpy(tmp, in, N_BLOCK);
    683     if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
    684     xor_block(out, iv);
    685     memcpy(iv, tmp, N_BLOCK);
    686     in += N_BLOCK;
    687     out += N_BLOCK;
    688   }
    689   return EXIT_SUCCESS;
    690 }
    691 
    692 #endif
    693 
    694 #if defined(AES_ENC_128_OTFK)
    695 
    696 /*  The 'on the fly' encryption key update for for 128 bit keys */
    697 
    698 static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
    699   uint_8t cc;
    700 
    701   k[0] ^= s_box(k[13]) ^ *rc;
    702   k[1] ^= s_box(k[14]);
    703   k[2] ^= s_box(k[15]);
    704   k[3] ^= s_box(k[12]);
    705   *rc = f2(*rc);
    706 
    707   for (cc = 4; cc < 16; cc += 4) {
    708     k[cc + 0] ^= k[cc - 4];
    709     k[cc + 1] ^= k[cc - 3];
    710     k[cc + 2] ^= k[cc - 2];
    711     k[cc + 3] ^= k[cc - 1];
    712   }
    713 }
    714 
    715 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
    716 
    717 void aes_encrypt_128(const unsigned char in[N_BLOCK],
    718                      unsigned char out[N_BLOCK],
    719                      const unsigned char key[N_BLOCK],
    720                      unsigned char o_key[N_BLOCK]) {
    721   uint_8t s1[N_BLOCK], r, rc = 1;
    722 
    723   if (o_key != key) block_copy(o_key, key);
    724   copy_and_key(s1, in, o_key);
    725 
    726   for (r = 1; r < 10; ++r)
    727 #if defined(VERSION_1)
    728   {
    729     mix_sub_columns(s1);
    730     update_encrypt_key_128(o_key, &rc);
    731     add_round_key(s1, o_key);
    732   }
    733 #else
    734   {
    735     uint_8t s2[N_BLOCK];
    736     mix_sub_columns(s2, s1);
    737     update_encrypt_key_128(o_key, &rc);
    738     copy_and_key(s1, s2, o_key);
    739   }
    740 #endif
    741 
    742   shift_sub_rows(s1);
    743   update_encrypt_key_128(o_key, &rc);
    744   copy_and_key(out, s1, o_key);
    745 }
    746 
    747 #endif
    748 
    749 #if defined(AES_DEC_128_OTFK)
    750 
    751 /*  The 'on the fly' decryption key update for for 128 bit keys */
    752 
    753 static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
    754   uint_8t cc;
    755 
    756   for (cc = 12; cc > 0; cc -= 4) {
    757     k[cc + 0] ^= k[cc - 4];
    758     k[cc + 1] ^= k[cc - 3];
    759     k[cc + 2] ^= k[cc - 2];
    760     k[cc + 3] ^= k[cc - 1];
    761   }
    762   *rc = d2(*rc);
    763   k[0] ^= s_box(k[13]) ^ *rc;
    764   k[1] ^= s_box(k[14]);
    765   k[2] ^= s_box(k[15]);
    766   k[3] ^= s_box(k[12]);
    767 }
    768 
    769 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
    770 
    771 void aes_decrypt_128(const unsigned char in[N_BLOCK],
    772                      unsigned char out[N_BLOCK],
    773                      const unsigned char key[N_BLOCK],
    774                      unsigned char o_key[N_BLOCK]) {
    775   uint_8t s1[N_BLOCK], r, rc = 0x6c;
    776   if (o_key != key) block_copy(o_key, key);
    777 
    778   copy_and_key(s1, in, o_key);
    779   inv_shift_sub_rows(s1);
    780 
    781   for (r = 10; --r;)
    782 #if defined(VERSION_1)
    783   {
    784     update_decrypt_key_128(o_key, &rc);
    785     add_round_key(s1, o_key);
    786     inv_mix_sub_columns(s1);
    787   }
    788 #else
    789   {
    790     uint_8t s2[N_BLOCK];
    791     update_decrypt_key_128(o_key, &rc);
    792     copy_and_key(s2, s1, o_key);
    793     inv_mix_sub_columns(s1, s2);
    794   }
    795 #endif
    796   update_decrypt_key_128(o_key, &rc);
    797   copy_and_key(out, s1, o_key);
    798 }
    799 
    800 #endif
    801 
    802 #if defined(AES_ENC_256_OTFK)
    803 
    804 /*  The 'on the fly' encryption key update for for 256 bit keys */
    805 
    806 static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
    807   uint_8t cc;
    808 
    809   k[0] ^= s_box(k[29]) ^ *rc;
    810   k[1] ^= s_box(k[30]);
    811   k[2] ^= s_box(k[31]);
    812   k[3] ^= s_box(k[28]);
    813   *rc = f2(*rc);
    814 
    815   for (cc = 4; cc < 16; cc += 4) {
    816     k[cc + 0] ^= k[cc - 4];
    817     k[cc + 1] ^= k[cc - 3];
    818     k[cc + 2] ^= k[cc - 2];
    819     k[cc + 3] ^= k[cc - 1];
    820   }
    821 
    822   k[16] ^= s_box(k[12]);
    823   k[17] ^= s_box(k[13]);
    824   k[18] ^= s_box(k[14]);
    825   k[19] ^= s_box(k[15]);
    826 
    827   for (cc = 20; cc < 32; cc += 4) {
    828     k[cc + 0] ^= k[cc - 4];
    829     k[cc + 1] ^= k[cc - 3];
    830     k[cc + 2] ^= k[cc - 2];
    831     k[cc + 3] ^= k[cc - 1];
    832   }
    833 }
    834 
    835 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
    836 
    837 void aes_encrypt_256(const unsigned char in[N_BLOCK],
    838                      unsigned char out[N_BLOCK],
    839                      const unsigned char key[2 * N_BLOCK],
    840                      unsigned char o_key[2 * N_BLOCK]) {
    841   uint_8t s1[N_BLOCK], r, rc = 1;
    842   if (o_key != key) {
    843     block_copy(o_key, key);
    844     block_copy(o_key + 16, key + 16);
    845   }
    846   copy_and_key(s1, in, o_key);
    847 
    848   for (r = 1; r < 14; ++r)
    849 #if defined(VERSION_1)
    850   {
    851     mix_sub_columns(s1);
    852     if (r & 1)
    853       add_round_key(s1, o_key + 16);
    854     else {
    855       update_encrypt_key_256(o_key, &rc);
    856       add_round_key(s1, o_key);
    857     }
    858   }
    859 #else
    860   {
    861     uint_8t s2[N_BLOCK];
    862     mix_sub_columns(s2, s1);
    863     if (r & 1)
    864       copy_and_key(s1, s2, o_key + 16);
    865     else {
    866       update_encrypt_key_256(o_key, &rc);
    867       copy_and_key(s1, s2, o_key);
    868     }
    869   }
    870 #endif
    871 
    872   shift_sub_rows(s1);
    873   update_encrypt_key_256(o_key, &rc);
    874   copy_and_key(out, s1, o_key);
    875 }
    876 
    877 #endif
    878 
    879 #if defined(AES_DEC_256_OTFK)
    880 
    881 /*  The 'on the fly' encryption key update for for 256 bit keys */
    882 
    883 static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
    884   uint_8t cc;
    885 
    886   for (cc = 28; cc > 16; cc -= 4) {
    887     k[cc + 0] ^= k[cc - 4];
    888     k[cc + 1] ^= k[cc - 3];
    889     k[cc + 2] ^= k[cc - 2];
    890     k[cc + 3] ^= k[cc - 1];
    891   }
    892 
    893   k[16] ^= s_box(k[12]);
    894   k[17] ^= s_box(k[13]);
    895   k[18] ^= s_box(k[14]);
    896   k[19] ^= s_box(k[15]);
    897 
    898   for (cc = 12; cc > 0; cc -= 4) {
    899     k[cc + 0] ^= k[cc - 4];
    900     k[cc + 1] ^= k[cc - 3];
    901     k[cc + 2] ^= k[cc - 2];
    902     k[cc + 3] ^= k[cc - 1];
    903   }
    904 
    905   *rc = d2(*rc);
    906   k[0] ^= s_box(k[29]) ^ *rc;
    907   k[1] ^= s_box(k[30]);
    908   k[2] ^= s_box(k[31]);
    909   k[3] ^= s_box(k[28]);
    910 }
    911 
    912 /*  Decrypt a single block of 16 bytes with 'on the fly'
    913     256 bit keying
    914 */
    915 void aes_decrypt_256(const unsigned char in[N_BLOCK],
    916                      unsigned char out[N_BLOCK],
    917                      const unsigned char key[2 * N_BLOCK],
    918                      unsigned char o_key[2 * N_BLOCK]) {
    919   uint_8t s1[N_BLOCK], r, rc = 0x80;
    920 
    921   if (o_key != key) {
    922     block_copy(o_key, key);
    923     block_copy(o_key + 16, key + 16);
    924   }
    925 
    926   copy_and_key(s1, in, o_key);
    927   inv_shift_sub_rows(s1);
    928 
    929   for (r = 14; --r;)
    930 #if defined(VERSION_1)
    931   {
    932     if ((r & 1)) {
    933       update_decrypt_key_256(o_key, &rc);
    934       add_round_key(s1, o_key + 16);
    935     } else
    936       add_round_key(s1, o_key);
    937     inv_mix_sub_columns(s1);
    938   }
    939 #else
    940   {
    941     uint_8t s2[N_BLOCK];
    942     if ((r & 1)) {
    943       update_decrypt_key_256(o_key, &rc);
    944       copy_and_key(s2, s1, o_key + 16);
    945     } else
    946       copy_and_key(s2, s1, o_key);
    947     inv_mix_sub_columns(s1, s2);
    948   }
    949 #endif
    950   copy_and_key(out, s1, o_key);
    951 }
    952 
    953 #endif
    954