Home | History | Annotate | Download | only in modes
      1 /* ====================================================================
      2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in
     13  *    the documentation and/or other materials provided with the
     14  *    distribution.
     15  *
     16  * 3. All advertising materials mentioning features or use of this
     17  *    software must display the following acknowledgment:
     18  *    "This product includes software developed by the OpenSSL Project
     19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
     20  *
     21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     22  *    endorse or promote products derived from this software without
     23  *    prior written permission. For written permission, please contact
     24  *    openssl-core (at) openssl.org.
     25  *
     26  * 5. Products derived from this software may not be called "OpenSSL"
     27  *    nor may "OpenSSL" appear in their names without prior written
     28  *    permission of the OpenSSL Project.
     29  *
     30  * 6. Redistributions of any form whatsoever must retain the following
     31  *    acknowledgment:
     32  *    "This product includes software developed by the OpenSSL Project
     33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
     34  *
     35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     46  * OF THE POSSIBILITY OF SUCH DAMAGE.
     47  * ====================================================================
     48  */
     49 
     50 #define OPENSSL_FIPSAPI
     51 
     52 #include <openssl/crypto.h>
     53 #include "modes_lcl.h"
     54 #include <string.h>
     55 
     56 #ifndef MODES_DEBUG
     57 # ifndef NDEBUG
     58 #  define NDEBUG
     59 # endif
     60 #endif
     61 #include <assert.h>
     62 
     63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
     64 /* redefine, because alignment is ensured */
     65 #undef	GETU32
     66 #define	GETU32(p)	BSWAP4(*(const u32 *)(p))
     67 #undef	PUTU32
     68 #define	PUTU32(p,v)	*(u32 *)(p) = BSWAP4(v)
     69 #endif
     70 
     71 #define	PACK(s)		((size_t)(s)<<(sizeof(size_t)*8-16))
     72 #define REDUCE1BIT(V)	do { \
     73 	if (sizeof(size_t)==8) { \
     74 		u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
     75 		V.lo  = (V.hi<<63)|(V.lo>>1); \
     76 		V.hi  = (V.hi>>1 )^T; \
     77 	} \
     78 	else { \
     79 		u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
     80 		V.lo  = (V.hi<<63)|(V.lo>>1); \
     81 		V.hi  = (V.hi>>1 )^((u64)T<<32); \
     82 	} \
     83 } while(0)
     84 
     85 /*
     86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
     87  * never be set to 8. 8 is effectively reserved for testing purposes.
     88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
     89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
     90  * whole spectrum of possible table driven implementations. Why? In
     91  * non-"Shoup's" case memory access pattern is segmented in such manner,
     92  * that it's trivial to see that cache timing information can reveal
     93  * fair portion of intermediate hash value. Given that ciphertext is
     94  * always available to attacker, it's possible for him to attempt to
     95  * deduce secret parameter H and if successful, tamper with messages
     96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
     97  * not as trivial, but there is no reason to believe that it's resistant
     98  * to cache-timing attack. And the thing about "8-bit" implementation is
     99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
    100  * key + 1KB shared. Well, on pros side it should be twice as fast as
    101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
    102  * was observed to run ~75% faster, closer to 100% for commercial
    103  * compilers... Yet "4-bit" procedure is preferred, because it's
    104  * believed to provide better security-performance balance and adequate
    105  * all-round performance. "All-round" refers to things like:
    106  *
    107  * - shorter setup time effectively improves overall timing for
    108  *   handling short messages;
    109  * - larger table allocation can become unbearable because of VM
    110  *   subsystem penalties (for example on Windows large enough free
    111  *   results in VM working set trimming, meaning that consequent
    112  *   malloc would immediately incur working set expansion);
    113  * - larger table has larger cache footprint, which can affect
    114  *   performance of other code paths (not necessarily even from same
    115  *   thread in Hyper-Threading world);
    116  *
    117  * Value of 1 is not appropriate for performance reasons.
    118  */
    119 #if	TABLE_BITS==8
    120 
    121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
    122 {
    123 	int  i, j;
    124 	u128 V;
    125 
    126 	Htable[0].hi = 0;
    127 	Htable[0].lo = 0;
    128 	V.hi = H[0];
    129 	V.lo = H[1];
    130 
    131 	for (Htable[128]=V, i=64; i>0; i>>=1) {
    132 		REDUCE1BIT(V);
    133 		Htable[i] = V;
    134 	}
    135 
    136 	for (i=2; i<256; i<<=1) {
    137 		u128 *Hi = Htable+i, H0 = *Hi;
    138 		for (j=1; j<i; ++j) {
    139 			Hi[j].hi = H0.hi^Htable[j].hi;
    140 			Hi[j].lo = H0.lo^Htable[j].lo;
    141 		}
    142 	}
    143 }
    144 
    145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
    146 {
    147 	u128 Z = { 0, 0};
    148 	const u8 *xi = (const u8 *)Xi+15;
    149 	size_t rem, n = *xi;
    150 	const union { long one; char little; } is_endian = {1};
    151 	static const size_t rem_8bit[256] = {
    152 		PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
    153 		PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
    154 		PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
    155 		PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
    156 		PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
    157 		PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
    158 		PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
    159 		PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
    160 		PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
    161 		PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
    162 		PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
    163 		PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
    164 		PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
    165 		PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
    166 		PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
    167 		PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
    168 		PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
    169 		PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
    170 		PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
    171 		PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
    172 		PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
    173 		PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
    174 		PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
    175 		PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
    176 		PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
    177 		PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
    178 		PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
    179 		PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
    180 		PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
    181 		PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
    182 		PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
    183 		PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
    184 		PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
    185 		PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
    186 		PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
    187 		PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
    188 		PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
    189 		PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
    190 		PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
    191 		PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
    192 		PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
    193 		PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
    194 		PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
    195 		PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
    196 		PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
    197 		PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
    198 		PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
    199 		PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
    200 		PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
    201 		PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
    202 		PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
    203 		PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
    204 		PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
    205 		PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
    206 		PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
    207 		PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
    208 		PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
    209 		PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
    210 		PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
    211 		PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
    212 		PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
    213 		PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
    214 		PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
    215 		PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
    216 
    217 	while (1) {
    218 		Z.hi ^= Htable[n].hi;
    219 		Z.lo ^= Htable[n].lo;
    220 
    221 		if ((u8 *)Xi==xi)	break;
    222 
    223 		n = *(--xi);
    224 
    225 		rem  = (size_t)Z.lo&0xff;
    226 		Z.lo = (Z.hi<<56)|(Z.lo>>8);
    227 		Z.hi = (Z.hi>>8);
    228 		if (sizeof(size_t)==8)
    229 			Z.hi ^= rem_8bit[rem];
    230 		else
    231 			Z.hi ^= (u64)rem_8bit[rem]<<32;
    232 	}
    233 
    234 	if (is_endian.little) {
    235 #ifdef BSWAP8
    236 		Xi[0] = BSWAP8(Z.hi);
    237 		Xi[1] = BSWAP8(Z.lo);
    238 #else
    239 		u8 *p = (u8 *)Xi;
    240 		u32 v;
    241 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
    242 		v = (u32)(Z.hi);	PUTU32(p+4,v);
    243 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
    244 		v = (u32)(Z.lo);	PUTU32(p+12,v);
    245 #endif
    246 	}
    247 	else {
    248 		Xi[0] = Z.hi;
    249 		Xi[1] = Z.lo;
    250 	}
    251 }
    252 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
    253 
    254 #elif	TABLE_BITS==4
    255 
    256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
    257 {
    258 	u128 V;
    259 #if defined(OPENSSL_SMALL_FOOTPRINT)
    260 	int  i;
    261 #endif
    262 
    263 	Htable[0].hi = 0;
    264 	Htable[0].lo = 0;
    265 	V.hi = H[0];
    266 	V.lo = H[1];
    267 
    268 #if defined(OPENSSL_SMALL_FOOTPRINT)
    269 	for (Htable[8]=V, i=4; i>0; i>>=1) {
    270 		REDUCE1BIT(V);
    271 		Htable[i] = V;
    272 	}
    273 
    274 	for (i=2; i<16; i<<=1) {
    275 		u128 *Hi = Htable+i;
    276 		int   j;
    277 		for (V=*Hi, j=1; j<i; ++j) {
    278 			Hi[j].hi = V.hi^Htable[j].hi;
    279 			Hi[j].lo = V.lo^Htable[j].lo;
    280 		}
    281 	}
    282 #else
    283 	Htable[8] = V;
    284 	REDUCE1BIT(V);
    285 	Htable[4] = V;
    286 	REDUCE1BIT(V);
    287 	Htable[2] = V;
    288 	REDUCE1BIT(V);
    289 	Htable[1] = V;
    290 	Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
    291 	V=Htable[4];
    292 	Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
    293 	Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
    294 	Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
    295 	V=Htable[8];
    296 	Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
    297 	Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
    298 	Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
    299 	Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
    300 	Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
    301 	Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
    302 	Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
    303 #endif
    304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
    305 	/*
    306 	 * ARM assembler expects specific dword order in Htable.
    307 	 */
    308 	{
    309 	int j;
    310 	const union { long one; char little; } is_endian = {1};
    311 
    312 	if (is_endian.little)
    313 		for (j=0;j<16;++j) {
    314 			V = Htable[j];
    315 			Htable[j].hi = V.lo;
    316 			Htable[j].lo = V.hi;
    317 		}
    318 	else
    319 		for (j=0;j<16;++j) {
    320 			V = Htable[j];
    321 			Htable[j].hi = V.lo<<32|V.lo>>32;
    322 			Htable[j].lo = V.hi<<32|V.hi>>32;
    323 		}
    324 	}
    325 #endif
    326 }
    327 
    328 #ifndef GHASH_ASM
    329 static const size_t rem_4bit[16] = {
    330 	PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
    331 	PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
    332 	PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
    333 	PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
    334 
    335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
    336 {
    337 	u128 Z;
    338 	int cnt = 15;
    339 	size_t rem, nlo, nhi;
    340 	const union { long one; char little; } is_endian = {1};
    341 
    342 	nlo  = ((const u8 *)Xi)[15];
    343 	nhi  = nlo>>4;
    344 	nlo &= 0xf;
    345 
    346 	Z.hi = Htable[nlo].hi;
    347 	Z.lo = Htable[nlo].lo;
    348 
    349 	while (1) {
    350 		rem  = (size_t)Z.lo&0xf;
    351 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
    352 		Z.hi = (Z.hi>>4);
    353 		if (sizeof(size_t)==8)
    354 			Z.hi ^= rem_4bit[rem];
    355 		else
    356 			Z.hi ^= (u64)rem_4bit[rem]<<32;
    357 
    358 		Z.hi ^= Htable[nhi].hi;
    359 		Z.lo ^= Htable[nhi].lo;
    360 
    361 		if (--cnt<0)		break;
    362 
    363 		nlo  = ((const u8 *)Xi)[cnt];
    364 		nhi  = nlo>>4;
    365 		nlo &= 0xf;
    366 
    367 		rem  = (size_t)Z.lo&0xf;
    368 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
    369 		Z.hi = (Z.hi>>4);
    370 		if (sizeof(size_t)==8)
    371 			Z.hi ^= rem_4bit[rem];
    372 		else
    373 			Z.hi ^= (u64)rem_4bit[rem]<<32;
    374 
    375 		Z.hi ^= Htable[nlo].hi;
    376 		Z.lo ^= Htable[nlo].lo;
    377 	}
    378 
    379 	if (is_endian.little) {
    380 #ifdef BSWAP8
    381 		Xi[0] = BSWAP8(Z.hi);
    382 		Xi[1] = BSWAP8(Z.lo);
    383 #else
    384 		u8 *p = (u8 *)Xi;
    385 		u32 v;
    386 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
    387 		v = (u32)(Z.hi);	PUTU32(p+4,v);
    388 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
    389 		v = (u32)(Z.lo);	PUTU32(p+12,v);
    390 #endif
    391 	}
    392 	else {
    393 		Xi[0] = Z.hi;
    394 		Xi[1] = Z.lo;
    395 	}
    396 }
    397 
    398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
    399 /*
    400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
    401  * details... Compiler-generated code doesn't seem to give any
    402  * performance improvement, at least not on x86[_64]. It's here
    403  * mostly as reference and a placeholder for possible future
    404  * non-trivial optimization[s]...
    405  */
    406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
    407 				const u8 *inp,size_t len)
    408 {
    409     u128 Z;
    410     int cnt;
    411     size_t rem, nlo, nhi;
    412     const union { long one; char little; } is_endian = {1};
    413 
    414 #if 1
    415     do {
    416 	cnt  = 15;
    417 	nlo  = ((const u8 *)Xi)[15];
    418 	nlo ^= inp[15];
    419 	nhi  = nlo>>4;
    420 	nlo &= 0xf;
    421 
    422 	Z.hi = Htable[nlo].hi;
    423 	Z.lo = Htable[nlo].lo;
    424 
    425 	while (1) {
    426 		rem  = (size_t)Z.lo&0xf;
    427 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
    428 		Z.hi = (Z.hi>>4);
    429 		if (sizeof(size_t)==8)
    430 			Z.hi ^= rem_4bit[rem];
    431 		else
    432 			Z.hi ^= (u64)rem_4bit[rem]<<32;
    433 
    434 		Z.hi ^= Htable[nhi].hi;
    435 		Z.lo ^= Htable[nhi].lo;
    436 
    437 		if (--cnt<0)		break;
    438 
    439 		nlo  = ((const u8 *)Xi)[cnt];
    440 		nlo ^= inp[cnt];
    441 		nhi  = nlo>>4;
    442 		nlo &= 0xf;
    443 
    444 		rem  = (size_t)Z.lo&0xf;
    445 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
    446 		Z.hi = (Z.hi>>4);
    447 		if (sizeof(size_t)==8)
    448 			Z.hi ^= rem_4bit[rem];
    449 		else
    450 			Z.hi ^= (u64)rem_4bit[rem]<<32;
    451 
    452 		Z.hi ^= Htable[nlo].hi;
    453 		Z.lo ^= Htable[nlo].lo;
    454 	}
    455 #else
    456     /*
    457      * Extra 256+16 bytes per-key plus 512 bytes shared tables
    458      * [should] give ~50% improvement... One could have PACK()-ed
    459      * the rem_8bit even here, but the priority is to minimize
    460      * cache footprint...
    461      */
    462     u128 Hshr4[16];	/* Htable shifted right by 4 bits */
    463     u8   Hshl4[16];	/* Htable shifted left  by 4 bits */
    464     static const unsigned short rem_8bit[256] = {
    465 	0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
    466 	0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
    467 	0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
    468 	0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
    469 	0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
    470 	0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
    471 	0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
    472 	0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
    473 	0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
    474 	0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
    475 	0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
    476 	0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
    477 	0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
    478 	0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
    479 	0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
    480 	0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
    481 	0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
    482 	0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
    483 	0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
    484 	0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
    485 	0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
    486 	0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
    487 	0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
    488 	0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
    489 	0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
    490 	0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
    491 	0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
    492 	0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
    493 	0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
    494 	0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
    495 	0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
    496 	0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
    497     /*
    498      * This pre-processing phase slows down procedure by approximately
    499      * same time as it makes each loop spin faster. In other words
    500      * single block performance is approximately same as straightforward
    501      * "4-bit" implementation, and then it goes only faster...
    502      */
    503     for (cnt=0; cnt<16; ++cnt) {
    504 	Z.hi = Htable[cnt].hi;
    505 	Z.lo = Htable[cnt].lo;
    506 	Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
    507 	Hshr4[cnt].hi = (Z.hi>>4);
    508 	Hshl4[cnt]    = (u8)(Z.lo<<4);
    509     }
    510 
    511     do {
    512 	for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
    513 		nlo  = ((const u8 *)Xi)[cnt];
    514 		nlo ^= inp[cnt];
    515 		nhi  = nlo>>4;
    516 		nlo &= 0xf;
    517 
    518 		Z.hi ^= Htable[nlo].hi;
    519 		Z.lo ^= Htable[nlo].lo;
    520 
    521 		rem = (size_t)Z.lo&0xff;
    522 
    523 		Z.lo = (Z.hi<<56)|(Z.lo>>8);
    524 		Z.hi = (Z.hi>>8);
    525 
    526 		Z.hi ^= Hshr4[nhi].hi;
    527 		Z.lo ^= Hshr4[nhi].lo;
    528 		Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
    529 	}
    530 
    531 	nlo  = ((const u8 *)Xi)[0];
    532 	nlo ^= inp[0];
    533 	nhi  = nlo>>4;
    534 	nlo &= 0xf;
    535 
    536 	Z.hi ^= Htable[nlo].hi;
    537 	Z.lo ^= Htable[nlo].lo;
    538 
    539 	rem = (size_t)Z.lo&0xf;
    540 
    541 	Z.lo = (Z.hi<<60)|(Z.lo>>4);
    542 	Z.hi = (Z.hi>>4);
    543 
    544 	Z.hi ^= Htable[nhi].hi;
    545 	Z.lo ^= Htable[nhi].lo;
    546 	Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
    547 #endif
    548 
    549 	if (is_endian.little) {
    550 #ifdef BSWAP8
    551 		Xi[0] = BSWAP8(Z.hi);
    552 		Xi[1] = BSWAP8(Z.lo);
    553 #else
    554 		u8 *p = (u8 *)Xi;
    555 		u32 v;
    556 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
    557 		v = (u32)(Z.hi);	PUTU32(p+4,v);
    558 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
    559 		v = (u32)(Z.lo);	PUTU32(p+12,v);
    560 #endif
    561 	}
    562 	else {
    563 		Xi[0] = Z.hi;
    564 		Xi[1] = Z.lo;
    565 	}
    566     } while (inp+=16, len-=16);
    567 }
    568 #endif
    569 #else
    570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
    571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
    572 #endif
    573 
    574 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
    575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
    576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
    577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
    578  * trashing effect. In other words idea is to hash data while it's
    579  * still in L1 cache after encryption pass... */
    580 #define GHASH_CHUNK       (3*1024)
    581 #endif
    582 
    583 #else	/* TABLE_BITS */
    584 
    585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
    586 {
    587 	u128 V,Z = { 0,0 };
    588 	long X;
    589 	int  i,j;
    590 	const long *xi = (const long *)Xi;
    591 	const union { long one; char little; } is_endian = {1};
    592 
    593 	V.hi = H[0];	/* H is in host byte order, no byte swapping */
    594 	V.lo = H[1];
    595 
    596 	for (j=0; j<16/sizeof(long); ++j) {
    597 		if (is_endian.little) {
    598 			if (sizeof(long)==8) {
    599 #ifdef BSWAP8
    600 				X = (long)(BSWAP8(xi[j]));
    601 #else
    602 				const u8 *p = (const u8 *)(xi+j);
    603 				X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
    604 #endif
    605 			}
    606 			else {
    607 				const u8 *p = (const u8 *)(xi+j);
    608 				X = (long)GETU32(p);
    609 			}
    610 		}
    611 		else
    612 			X = xi[j];
    613 
    614 		for (i=0; i<8*sizeof(long); ++i, X<<=1) {
    615 			u64 M = (u64)(X>>(8*sizeof(long)-1));
    616 			Z.hi ^= V.hi&M;
    617 			Z.lo ^= V.lo&M;
    618 
    619 			REDUCE1BIT(V);
    620 		}
    621 	}
    622 
    623 	if (is_endian.little) {
    624 #ifdef BSWAP8
    625 		Xi[0] = BSWAP8(Z.hi);
    626 		Xi[1] = BSWAP8(Z.lo);
    627 #else
    628 		u8 *p = (u8 *)Xi;
    629 		u32 v;
    630 		v = (u32)(Z.hi>>32);	PUTU32(p,v);
    631 		v = (u32)(Z.hi);	PUTU32(p+4,v);
    632 		v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
    633 		v = (u32)(Z.lo);	PUTU32(p+12,v);
    634 #endif
    635 	}
    636 	else {
    637 		Xi[0] = Z.hi;
    638 		Xi[1] = Z.lo;
    639 	}
    640 }
    641 #define GCM_MUL(ctx,Xi)	  gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
    642 
    643 #endif
    644 
    645 #if	TABLE_BITS==4 && defined(GHASH_ASM)
    646 # if	!defined(I386_ONLY) && \
    647 	(defined(__i386)	|| defined(__i386__)	|| \
    648 	 defined(__x86_64)	|| defined(__x86_64__)	|| \
    649 	 defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64))
    650 #  define GHASH_ASM_X86_OR_64
    651 #  define GCM_FUNCREF_4BIT
    652 extern unsigned int OPENSSL_ia32cap_P[2];
    653 
    654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
    655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
    656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
    657 
    658 #  if	defined(__i386) || defined(__i386__) || defined(_M_IX86)
    659 #   define GHASH_ASM_X86
    660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
    661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
    662 
    663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
    664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
    665 #  endif
    666 # elif defined(__arm__) || defined(__arm)
    667 #  include "arm_arch.h"
    668 #  if __ARM_ARCH__>=7
    669 #   define GHASH_ASM_ARM
    670 #   define GCM_FUNCREF_4BIT
    671 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
    672 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
    673 #  endif
    674 # endif
    675 #endif
    676 
    677 #ifdef GCM_FUNCREF_4BIT
    678 # undef  GCM_MUL
    679 # define GCM_MUL(ctx,Xi)	(*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
    680 # ifdef GHASH
    681 #  undef  GHASH
    682 #  define GHASH(ctx,in,len)	(*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
    683 # endif
    684 #endif
    685 
    686 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
    687 {
    688 	const union { long one; char little; } is_endian = {1};
    689 
    690 	memset(ctx,0,sizeof(*ctx));
    691 	ctx->block = block;
    692 	ctx->key   = key;
    693 
    694 	(*block)(ctx->H.c,ctx->H.c,key);
    695 
    696 	if (is_endian.little) {
    697 		/* H is stored in host byte order */
    698 #ifdef BSWAP8
    699 		ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
    700 		ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
    701 #else
    702 		u8 *p = ctx->H.c;
    703 		u64 hi,lo;
    704 		hi = (u64)GETU32(p)  <<32|GETU32(p+4);
    705 		lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
    706 		ctx->H.u[0] = hi;
    707 		ctx->H.u[1] = lo;
    708 #endif
    709 	}
    710 
    711 #if	TABLE_BITS==8
    712 	gcm_init_8bit(ctx->Htable,ctx->H.u);
    713 #elif	TABLE_BITS==4
    714 # if	defined(GHASH_ASM_X86_OR_64)
    715 #  if	!defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
    716 	if (OPENSSL_ia32cap_P[0]&(1<<24) &&	/* check FXSR bit */
    717 	    OPENSSL_ia32cap_P[1]&(1<<1) ) {	/* check PCLMULQDQ bit */
    718 		gcm_init_clmul(ctx->Htable,ctx->H.u);
    719 		ctx->gmult = gcm_gmult_clmul;
    720 		ctx->ghash = gcm_ghash_clmul;
    721 		return;
    722 	}
    723 #  endif
    724 	gcm_init_4bit(ctx->Htable,ctx->H.u);
    725 #  if	defined(GHASH_ASM_X86)			/* x86 only */
    726 #   if defined(OPENSSL_IA32_SSE2)
    727 	if (OPENSSL_ia32cap_P[0]&(1<<25)) {	/* check SSE bit */
    728 #   else
    729 	if (OPENSSL_ia32cap_P[0]&(1<<23)) {	/* check MMX bit */
    730 #   endif
    731 		ctx->gmult = gcm_gmult_4bit_mmx;
    732 		ctx->ghash = gcm_ghash_4bit_mmx;
    733 	} else {
    734 		ctx->gmult = gcm_gmult_4bit_x86;
    735 		ctx->ghash = gcm_ghash_4bit_x86;
    736 	}
    737 #  else
    738 	ctx->gmult = gcm_gmult_4bit;
    739 	ctx->ghash = gcm_ghash_4bit;
    740 #  endif
    741 # elif	defined(GHASH_ASM_ARM)
    742 	if (OPENSSL_armcap_P & ARMV7_NEON) {
    743 		ctx->gmult = gcm_gmult_neon;
    744 		ctx->ghash = gcm_ghash_neon;
    745 	} else {
    746 		gcm_init_4bit(ctx->Htable,ctx->H.u);
    747 		ctx->gmult = gcm_gmult_4bit;
    748 		ctx->ghash = gcm_ghash_4bit;
    749 	}
    750 # else
    751 	gcm_init_4bit(ctx->Htable,ctx->H.u);
    752 # endif
    753 #endif
    754 }
    755 
    756 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
    757 {
    758 	const union { long one; char little; } is_endian = {1};
    759 	unsigned int ctr;
    760 #ifdef GCM_FUNCREF_4BIT
    761 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
    762 #endif
    763 
    764 	ctx->Yi.u[0]  = 0;
    765 	ctx->Yi.u[1]  = 0;
    766 	ctx->Xi.u[0]  = 0;
    767 	ctx->Xi.u[1]  = 0;
    768 	ctx->len.u[0] = 0;	/* AAD length */
    769 	ctx->len.u[1] = 0;	/* message length */
    770 	ctx->ares = 0;
    771 	ctx->mres = 0;
    772 
    773 	if (len==12) {
    774 		memcpy(ctx->Yi.c,iv,12);
    775 		ctx->Yi.c[15]=1;
    776 		ctr=1;
    777 	}
    778 	else {
    779 		size_t i;
    780 		u64 len0 = len;
    781 
    782 		while (len>=16) {
    783 			for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
    784 			GCM_MUL(ctx,Yi);
    785 			iv += 16;
    786 			len -= 16;
    787 		}
    788 		if (len) {
    789 			for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
    790 			GCM_MUL(ctx,Yi);
    791 		}
    792 		len0 <<= 3;
    793 		if (is_endian.little) {
    794 #ifdef BSWAP8
    795 			ctx->Yi.u[1]  ^= BSWAP8(len0);
    796 #else
    797 			ctx->Yi.c[8]  ^= (u8)(len0>>56);
    798 			ctx->Yi.c[9]  ^= (u8)(len0>>48);
    799 			ctx->Yi.c[10] ^= (u8)(len0>>40);
    800 			ctx->Yi.c[11] ^= (u8)(len0>>32);
    801 			ctx->Yi.c[12] ^= (u8)(len0>>24);
    802 			ctx->Yi.c[13] ^= (u8)(len0>>16);
    803 			ctx->Yi.c[14] ^= (u8)(len0>>8);
    804 			ctx->Yi.c[15] ^= (u8)(len0);
    805 #endif
    806 		}
    807 		else
    808 			ctx->Yi.u[1]  ^= len0;
    809 
    810 		GCM_MUL(ctx,Yi);
    811 
    812 		if (is_endian.little)
    813 			ctr = GETU32(ctx->Yi.c+12);
    814 		else
    815 			ctr = ctx->Yi.d[3];
    816 	}
    817 
    818 	(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
    819 	++ctr;
    820 	if (is_endian.little)
    821 		PUTU32(ctx->Yi.c+12,ctr);
    822 	else
    823 		ctx->Yi.d[3] = ctr;
    824 }
    825 
    826 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
    827 {
    828 	size_t i;
    829 	unsigned int n;
    830 	u64 alen = ctx->len.u[0];
    831 #ifdef GCM_FUNCREF_4BIT
    832 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
    833 # ifdef GHASH
    834 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
    835 				const u8 *inp,size_t len)	= ctx->ghash;
    836 # endif
    837 #endif
    838 
    839 	if (ctx->len.u[1]) return -2;
    840 
    841 	alen += len;
    842 	if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
    843 		return -1;
    844 	ctx->len.u[0] = alen;
    845 
    846 	n = ctx->ares;
    847 	if (n) {
    848 		while (n && len) {
    849 			ctx->Xi.c[n] ^= *(aad++);
    850 			--len;
    851 			n = (n+1)%16;
    852 		}
    853 		if (n==0) GCM_MUL(ctx,Xi);
    854 		else {
    855 			ctx->ares = n;
    856 			return 0;
    857 		}
    858 	}
    859 
    860 #ifdef GHASH
    861 	if ((i = (len&(size_t)-16))) {
    862 		GHASH(ctx,aad,i);
    863 		aad += i;
    864 		len -= i;
    865 	}
    866 #else
    867 	while (len>=16) {
    868 		for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
    869 		GCM_MUL(ctx,Xi);
    870 		aad += 16;
    871 		len -= 16;
    872 	}
    873 #endif
    874 	if (len) {
    875 		n = (unsigned int)len;
    876 		for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
    877 	}
    878 
    879 	ctx->ares = n;
    880 	return 0;
    881 }
    882 
    883 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
    884 		const unsigned char *in, unsigned char *out,
    885 		size_t len)
    886 {
    887 	const union { long one; char little; } is_endian = {1};
    888 	unsigned int n, ctr;
    889 	size_t i;
    890 	u64        mlen  = ctx->len.u[1];
    891 	block128_f block = ctx->block;
    892 	void      *key   = ctx->key;
    893 #ifdef GCM_FUNCREF_4BIT
    894 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
    895 # ifdef GHASH
    896 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
    897 				const u8 *inp,size_t len)	= ctx->ghash;
    898 # endif
    899 #endif
    900 
    901 #if 0
    902 	n = (unsigned int)mlen%16; /* alternative to ctx->mres */
    903 #endif
    904 	mlen += len;
    905 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
    906 		return -1;
    907 	ctx->len.u[1] = mlen;
    908 
    909 	if (ctx->ares) {
    910 		/* First call to encrypt finalizes GHASH(AAD) */
    911 		GCM_MUL(ctx,Xi);
    912 		ctx->ares = 0;
    913 	}
    914 
    915 	if (is_endian.little)
    916 		ctr = GETU32(ctx->Yi.c+12);
    917 	else
    918 		ctr = ctx->Yi.d[3];
    919 
    920 	n = ctx->mres;
    921 #if !defined(OPENSSL_SMALL_FOOTPRINT)
    922 	if (16%sizeof(size_t) == 0) do {	/* always true actually */
    923 		if (n) {
    924 			while (n && len) {
    925 				ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
    926 				--len;
    927 				n = (n+1)%16;
    928 			}
    929 			if (n==0) GCM_MUL(ctx,Xi);
    930 			else {
    931 				ctx->mres = n;
    932 				return 0;
    933 			}
    934 		}
    935 #if defined(STRICT_ALIGNMENT)
    936 		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
    937 			break;
    938 #endif
    939 #if defined(GHASH) && defined(GHASH_CHUNK)
    940 		while (len>=GHASH_CHUNK) {
    941 		    size_t j=GHASH_CHUNK;
    942 
    943 		    while (j) {
    944 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
    945 			++ctr;
    946 			if (is_endian.little)
    947 				PUTU32(ctx->Yi.c+12,ctr);
    948 			else
    949 				ctx->Yi.d[3] = ctr;
    950 			for (i=0; i<16; i+=sizeof(size_t))
    951 				*(size_t *)(out+i) =
    952 				*(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
    953 			out += 16;
    954 			in  += 16;
    955 			j   -= 16;
    956 		    }
    957 		    GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
    958 		    len -= GHASH_CHUNK;
    959 		}
    960 		if ((i = (len&(size_t)-16))) {
    961 		    size_t j=i;
    962 
    963 		    while (len>=16) {
    964 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
    965 			++ctr;
    966 			if (is_endian.little)
    967 				PUTU32(ctx->Yi.c+12,ctr);
    968 			else
    969 				ctx->Yi.d[3] = ctr;
    970 			for (i=0; i<16; i+=sizeof(size_t))
    971 				*(size_t *)(out+i) =
    972 				*(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
    973 			out += 16;
    974 			in  += 16;
    975 			len -= 16;
    976 		    }
    977 		    GHASH(ctx,out-j,j);
    978 		}
    979 #else
    980 		while (len>=16) {
    981 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
    982 			++ctr;
    983 			if (is_endian.little)
    984 				PUTU32(ctx->Yi.c+12,ctr);
    985 			else
    986 				ctx->Yi.d[3] = ctr;
    987 			for (i=0; i<16; i+=sizeof(size_t))
    988 				*(size_t *)(ctx->Xi.c+i) ^=
    989 				*(size_t *)(out+i) =
    990 				*(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
    991 			GCM_MUL(ctx,Xi);
    992 			out += 16;
    993 			in  += 16;
    994 			len -= 16;
    995 		}
    996 #endif
    997 		if (len) {
    998 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
    999 			++ctr;
   1000 			if (is_endian.little)
   1001 				PUTU32(ctx->Yi.c+12,ctr);
   1002 			else
   1003 				ctx->Yi.d[3] = ctr;
   1004 			while (len--) {
   1005 				ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
   1006 				++n;
   1007 			}
   1008 		}
   1009 
   1010 		ctx->mres = n;
   1011 		return 0;
   1012 	} while(0);
   1013 #endif
   1014 	for (i=0;i<len;++i) {
   1015 		if (n==0) {
   1016 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
   1017 			++ctr;
   1018 			if (is_endian.little)
   1019 				PUTU32(ctx->Yi.c+12,ctr);
   1020 			else
   1021 				ctx->Yi.d[3] = ctr;
   1022 		}
   1023 		ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
   1024 		n = (n+1)%16;
   1025 		if (n==0)
   1026 			GCM_MUL(ctx,Xi);
   1027 	}
   1028 
   1029 	ctx->mres = n;
   1030 	return 0;
   1031 }
   1032 
   1033 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
   1034 		const unsigned char *in, unsigned char *out,
   1035 		size_t len)
   1036 {
   1037 	const union { long one; char little; } is_endian = {1};
   1038 	unsigned int n, ctr;
   1039 	size_t i;
   1040 	u64        mlen  = ctx->len.u[1];
   1041 	block128_f block = ctx->block;
   1042 	void      *key   = ctx->key;
   1043 #ifdef GCM_FUNCREF_4BIT
   1044 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
   1045 # ifdef GHASH
   1046 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
   1047 				const u8 *inp,size_t len)	= ctx->ghash;
   1048 # endif
   1049 #endif
   1050 
   1051 	mlen += len;
   1052 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
   1053 		return -1;
   1054 	ctx->len.u[1] = mlen;
   1055 
   1056 	if (ctx->ares) {
   1057 		/* First call to decrypt finalizes GHASH(AAD) */
   1058 		GCM_MUL(ctx,Xi);
   1059 		ctx->ares = 0;
   1060 	}
   1061 
   1062 	if (is_endian.little)
   1063 		ctr = GETU32(ctx->Yi.c+12);
   1064 	else
   1065 		ctr = ctx->Yi.d[3];
   1066 
   1067 	n = ctx->mres;
   1068 #if !defined(OPENSSL_SMALL_FOOTPRINT)
   1069 	if (16%sizeof(size_t) == 0) do {	/* always true actually */
   1070 		if (n) {
   1071 			while (n && len) {
   1072 				u8 c = *(in++);
   1073 				*(out++) = c^ctx->EKi.c[n];
   1074 				ctx->Xi.c[n] ^= c;
   1075 				--len;
   1076 				n = (n+1)%16;
   1077 			}
   1078 			if (n==0) GCM_MUL (ctx,Xi);
   1079 			else {
   1080 				ctx->mres = n;
   1081 				return 0;
   1082 			}
   1083 		}
   1084 #if defined(STRICT_ALIGNMENT)
   1085 		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
   1086 			break;
   1087 #endif
   1088 #if defined(GHASH) && defined(GHASH_CHUNK)
   1089 		while (len>=GHASH_CHUNK) {
   1090 		    size_t j=GHASH_CHUNK;
   1091 
   1092 		    GHASH(ctx,in,GHASH_CHUNK);
   1093 		    while (j) {
   1094 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
   1095 			++ctr;
   1096 			if (is_endian.little)
   1097 				PUTU32(ctx->Yi.c+12,ctr);
   1098 			else
   1099 				ctx->Yi.d[3] = ctr;
   1100 			for (i=0; i<16; i+=sizeof(size_t))
   1101 				*(size_t *)(out+i) =
   1102 				*(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
   1103 			out += 16;
   1104 			in  += 16;
   1105 			j   -= 16;
   1106 		    }
   1107 		    len -= GHASH_CHUNK;
   1108 		}
   1109 		if ((i = (len&(size_t)-16))) {
   1110 		    GHASH(ctx,in,i);
   1111 		    while (len>=16) {
   1112 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
   1113 			++ctr;
   1114 			if (is_endian.little)
   1115 				PUTU32(ctx->Yi.c+12,ctr);
   1116 			else
   1117 				ctx->Yi.d[3] = ctr;
   1118 			for (i=0; i<16; i+=sizeof(size_t))
   1119 				*(size_t *)(out+i) =
   1120 				*(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
   1121 			out += 16;
   1122 			in  += 16;
   1123 			len -= 16;
   1124 		    }
   1125 		}
   1126 #else
   1127 		while (len>=16) {
   1128 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
   1129 			++ctr;
   1130 			if (is_endian.little)
   1131 				PUTU32(ctx->Yi.c+12,ctr);
   1132 			else
   1133 				ctx->Yi.d[3] = ctr;
   1134 			for (i=0; i<16; i+=sizeof(size_t)) {
   1135 				size_t c = *(size_t *)(in+i);
   1136 				*(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
   1137 				*(size_t *)(ctx->Xi.c+i) ^= c;
   1138 			}
   1139 			GCM_MUL(ctx,Xi);
   1140 			out += 16;
   1141 			in  += 16;
   1142 			len -= 16;
   1143 		}
   1144 #endif
   1145 		if (len) {
   1146 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
   1147 			++ctr;
   1148 			if (is_endian.little)
   1149 				PUTU32(ctx->Yi.c+12,ctr);
   1150 			else
   1151 				ctx->Yi.d[3] = ctr;
   1152 			while (len--) {
   1153 				u8 c = in[n];
   1154 				ctx->Xi.c[n] ^= c;
   1155 				out[n] = c^ctx->EKi.c[n];
   1156 				++n;
   1157 			}
   1158 		}
   1159 
   1160 		ctx->mres = n;
   1161 		return 0;
   1162 	} while(0);
   1163 #endif
   1164 	for (i=0;i<len;++i) {
   1165 		u8 c;
   1166 		if (n==0) {
   1167 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
   1168 			++ctr;
   1169 			if (is_endian.little)
   1170 				PUTU32(ctx->Yi.c+12,ctr);
   1171 			else
   1172 				ctx->Yi.d[3] = ctr;
   1173 		}
   1174 		c = in[i];
   1175 		out[i] = c^ctx->EKi.c[n];
   1176 		ctx->Xi.c[n] ^= c;
   1177 		n = (n+1)%16;
   1178 		if (n==0)
   1179 			GCM_MUL(ctx,Xi);
   1180 	}
   1181 
   1182 	ctx->mres = n;
   1183 	return 0;
   1184 }
   1185 
   1186 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
   1187 		const unsigned char *in, unsigned char *out,
   1188 		size_t len, ctr128_f stream)
   1189 {
   1190 	const union { long one; char little; } is_endian = {1};
   1191 	unsigned int n, ctr;
   1192 	size_t i;
   1193 	u64   mlen = ctx->len.u[1];
   1194 	void *key  = ctx->key;
   1195 #ifdef GCM_FUNCREF_4BIT
   1196 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
   1197 # ifdef GHASH
   1198 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
   1199 				const u8 *inp,size_t len)	= ctx->ghash;
   1200 # endif
   1201 #endif
   1202 
   1203 	mlen += len;
   1204 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
   1205 		return -1;
   1206 	ctx->len.u[1] = mlen;
   1207 
   1208 	if (ctx->ares) {
   1209 		/* First call to encrypt finalizes GHASH(AAD) */
   1210 		GCM_MUL(ctx,Xi);
   1211 		ctx->ares = 0;
   1212 	}
   1213 
   1214 	if (is_endian.little)
   1215 		ctr = GETU32(ctx->Yi.c+12);
   1216 	else
   1217 		ctr = ctx->Yi.d[3];
   1218 
   1219 	n = ctx->mres;
   1220 	if (n) {
   1221 		while (n && len) {
   1222 			ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
   1223 			--len;
   1224 			n = (n+1)%16;
   1225 		}
   1226 		if (n==0) GCM_MUL(ctx,Xi);
   1227 		else {
   1228 			ctx->mres = n;
   1229 			return 0;
   1230 		}
   1231 	}
   1232 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
   1233 	while (len>=GHASH_CHUNK) {
   1234 		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
   1235 		ctr += GHASH_CHUNK/16;
   1236 		if (is_endian.little)
   1237 			PUTU32(ctx->Yi.c+12,ctr);
   1238 		else
   1239 			ctx->Yi.d[3] = ctr;
   1240 		GHASH(ctx,out,GHASH_CHUNK);
   1241 		out += GHASH_CHUNK;
   1242 		in  += GHASH_CHUNK;
   1243 		len -= GHASH_CHUNK;
   1244 	}
   1245 #endif
   1246 	if ((i = (len&(size_t)-16))) {
   1247 		size_t j=i/16;
   1248 
   1249 		(*stream)(in,out,j,key,ctx->Yi.c);
   1250 		ctr += (unsigned int)j;
   1251 		if (is_endian.little)
   1252 			PUTU32(ctx->Yi.c+12,ctr);
   1253 		else
   1254 			ctx->Yi.d[3] = ctr;
   1255 		in  += i;
   1256 		len -= i;
   1257 #if defined(GHASH)
   1258 		GHASH(ctx,out,i);
   1259 		out += i;
   1260 #else
   1261 		while (j--) {
   1262 			for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
   1263 			GCM_MUL(ctx,Xi);
   1264 			out += 16;
   1265 		}
   1266 #endif
   1267 	}
   1268 	if (len) {
   1269 		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
   1270 		++ctr;
   1271 		if (is_endian.little)
   1272 			PUTU32(ctx->Yi.c+12,ctr);
   1273 		else
   1274 			ctx->Yi.d[3] = ctr;
   1275 		while (len--) {
   1276 			ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
   1277 			++n;
   1278 		}
   1279 	}
   1280 
   1281 	ctx->mres = n;
   1282 	return 0;
   1283 }
   1284 
   1285 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
   1286 		const unsigned char *in, unsigned char *out,
   1287 		size_t len,ctr128_f stream)
   1288 {
   1289 	const union { long one; char little; } is_endian = {1};
   1290 	unsigned int n, ctr;
   1291 	size_t i;
   1292 	u64   mlen = ctx->len.u[1];
   1293 	void *key  = ctx->key;
   1294 #ifdef GCM_FUNCREF_4BIT
   1295 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
   1296 # ifdef GHASH
   1297 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
   1298 				const u8 *inp,size_t len)	= ctx->ghash;
   1299 # endif
   1300 #endif
   1301 
   1302 	mlen += len;
   1303 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
   1304 		return -1;
   1305 	ctx->len.u[1] = mlen;
   1306 
   1307 	if (ctx->ares) {
   1308 		/* First call to decrypt finalizes GHASH(AAD) */
   1309 		GCM_MUL(ctx,Xi);
   1310 		ctx->ares = 0;
   1311 	}
   1312 
   1313 	if (is_endian.little)
   1314 		ctr = GETU32(ctx->Yi.c+12);
   1315 	else
   1316 		ctr = ctx->Yi.d[3];
   1317 
   1318 	n = ctx->mres;
   1319 	if (n) {
   1320 		while (n && len) {
   1321 			u8 c = *(in++);
   1322 			*(out++) = c^ctx->EKi.c[n];
   1323 			ctx->Xi.c[n] ^= c;
   1324 			--len;
   1325 			n = (n+1)%16;
   1326 		}
   1327 		if (n==0) GCM_MUL (ctx,Xi);
   1328 		else {
   1329 			ctx->mres = n;
   1330 			return 0;
   1331 		}
   1332 	}
   1333 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
   1334 	while (len>=GHASH_CHUNK) {
   1335 		GHASH(ctx,in,GHASH_CHUNK);
   1336 		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
   1337 		ctr += GHASH_CHUNK/16;
   1338 		if (is_endian.little)
   1339 			PUTU32(ctx->Yi.c+12,ctr);
   1340 		else
   1341 			ctx->Yi.d[3] = ctr;
   1342 		out += GHASH_CHUNK;
   1343 		in  += GHASH_CHUNK;
   1344 		len -= GHASH_CHUNK;
   1345 	}
   1346 #endif
   1347 	if ((i = (len&(size_t)-16))) {
   1348 		size_t j=i/16;
   1349 
   1350 #if defined(GHASH)
   1351 		GHASH(ctx,in,i);
   1352 #else
   1353 		while (j--) {
   1354 			size_t k;
   1355 			for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
   1356 			GCM_MUL(ctx,Xi);
   1357 			in += 16;
   1358 		}
   1359 		j   = i/16;
   1360 		in -= i;
   1361 #endif
   1362 		(*stream)(in,out,j,key,ctx->Yi.c);
   1363 		ctr += (unsigned int)j;
   1364 		if (is_endian.little)
   1365 			PUTU32(ctx->Yi.c+12,ctr);
   1366 		else
   1367 			ctx->Yi.d[3] = ctr;
   1368 		out += i;
   1369 		in  += i;
   1370 		len -= i;
   1371 	}
   1372 	if (len) {
   1373 		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
   1374 		++ctr;
   1375 		if (is_endian.little)
   1376 			PUTU32(ctx->Yi.c+12,ctr);
   1377 		else
   1378 			ctx->Yi.d[3] = ctr;
   1379 		while (len--) {
   1380 			u8 c = in[n];
   1381 			ctx->Xi.c[n] ^= c;
   1382 			out[n] = c^ctx->EKi.c[n];
   1383 			++n;
   1384 		}
   1385 	}
   1386 
   1387 	ctx->mres = n;
   1388 	return 0;
   1389 }
   1390 
   1391 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
   1392 			size_t len)
   1393 {
   1394 	const union { long one; char little; } is_endian = {1};
   1395 	u64 alen = ctx->len.u[0]<<3;
   1396 	u64 clen = ctx->len.u[1]<<3;
   1397 #ifdef GCM_FUNCREF_4BIT
   1398 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
   1399 #endif
   1400 
   1401 	if (ctx->mres)
   1402 		GCM_MUL(ctx,Xi);
   1403 
   1404 	if (is_endian.little) {
   1405 #ifdef BSWAP8
   1406 		alen = BSWAP8(alen);
   1407 		clen = BSWAP8(clen);
   1408 #else
   1409 		u8 *p = ctx->len.c;
   1410 
   1411 		ctx->len.u[0] = alen;
   1412 		ctx->len.u[1] = clen;
   1413 
   1414 		alen = (u64)GETU32(p)  <<32|GETU32(p+4);
   1415 		clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
   1416 #endif
   1417 	}
   1418 
   1419 	ctx->Xi.u[0] ^= alen;
   1420 	ctx->Xi.u[1] ^= clen;
   1421 	GCM_MUL(ctx,Xi);
   1422 
   1423 	ctx->Xi.u[0] ^= ctx->EK0.u[0];
   1424 	ctx->Xi.u[1] ^= ctx->EK0.u[1];
   1425 
   1426 	if (tag && len<=sizeof(ctx->Xi))
   1427 		return memcmp(ctx->Xi.c,tag,len);
   1428 	else
   1429 		return -1;
   1430 }
   1431 
   1432 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
   1433 {
   1434 	CRYPTO_gcm128_finish(ctx, NULL, 0);
   1435 	memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
   1436 }
   1437 
   1438 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
   1439 {
   1440 	GCM128_CONTEXT *ret;
   1441 
   1442 	if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
   1443 		CRYPTO_gcm128_init(ret,key,block);
   1444 
   1445 	return ret;
   1446 }
   1447 
   1448 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
   1449 {
   1450 	if (ctx) {
   1451 		OPENSSL_cleanse(ctx,sizeof(*ctx));
   1452 		OPENSSL_free(ctx);
   1453 	}
   1454 }
   1455 
   1456 #if defined(SELFTEST)
   1457 #include <stdio.h>
   1458 #include <openssl/aes.h>
   1459 
   1460 /* Test Case 1 */
   1461 static const u8	K1[16],
   1462 		*P1=NULL,
   1463 		*A1=NULL,
   1464 		IV1[12],
   1465 		*C1=NULL,
   1466 		T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
   1467 
   1468 /* Test Case 2 */
   1469 #define K2 K1
   1470 #define A2 A1
   1471 #define IV2 IV1
   1472 static const u8	P2[16],
   1473 		C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
   1474 		T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
   1475 
   1476 /* Test Case 3 */
   1477 #define A3 A2
   1478 static const u8	K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
   1479 		P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
   1480 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
   1481 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
   1482 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
   1483 		IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
   1484 		C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
   1485 			0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
   1486 			0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
   1487 			0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
   1488 		T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
   1489 
   1490 /* Test Case 4 */
   1491 #define K4 K3
   1492 #define IV4 IV3
   1493 static const u8	P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
   1494 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
   1495 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
   1496 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
   1497 		A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
   1498 			0xab,0xad,0xda,0xd2},
   1499 		C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
   1500 			0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
   1501 			0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
   1502 			0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
   1503 		T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
   1504 
   1505 /* Test Case 5 */
   1506 #define K5 K4
   1507 #define P5 P4
   1508 #define A5 A4
   1509 static const u8	IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
   1510 		C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
   1511 			0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
   1512 			0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
   1513 			0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
   1514 		T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
   1515 
   1516 /* Test Case 6 */
   1517 #define K6 K5
   1518 #define P6 P5
   1519 #define A6 A5
   1520 static const u8	IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
   1521 			0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
   1522 			0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
   1523 			0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
   1524 		C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
   1525 			0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
   1526 			0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
   1527 			0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
   1528 		T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
   1529 
   1530 /* Test Case 7 */
   1531 static const u8 K7[24],
   1532 		*P7=NULL,
   1533 		*A7=NULL,
   1534 		IV7[12],
   1535 		*C7=NULL,
   1536 		T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
   1537 
   1538 /* Test Case 8 */
   1539 #define K8 K7
   1540 #define IV8 IV7
   1541 #define A8 A7
   1542 static const u8	P8[16],
   1543 		C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
   1544 		T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
   1545 
   1546 /* Test Case 9 */
   1547 #define A9 A8
   1548 static const u8	K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
   1549 			0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
   1550 		P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
   1551 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
   1552 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
   1553 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
   1554 		IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
   1555 		C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
   1556 			0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
   1557 			0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
   1558 			0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
   1559 		T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
   1560 
   1561 /* Test Case 10 */
   1562 #define K10 K9
   1563 #define IV10 IV9
   1564 static const u8	P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
   1565 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
   1566 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
   1567 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
   1568 		A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
   1569 			0xab,0xad,0xda,0xd2},
   1570 		C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
   1571 			0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
   1572 			0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
   1573 			0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
   1574 		T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
   1575 
   1576 /* Test Case 11 */
   1577 #define K11 K10
   1578 #define P11 P10
   1579 #define A11 A10
   1580 static const u8	IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
   1581 		C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
   1582 			0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
   1583 			0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
   1584 			0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
   1585 		T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
   1586 
   1587 /* Test Case 12 */
   1588 #define K12 K11
   1589 #define P12 P11
   1590 #define A12 A11
   1591 static const u8	IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
   1592 			0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
   1593 			0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
   1594 			0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
   1595 		C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
   1596 			0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
   1597 			0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
   1598 			0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
   1599 		T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
   1600 
   1601 /* Test Case 13 */
   1602 static const u8	K13[32],
   1603 		*P13=NULL,
   1604 		*A13=NULL,
   1605 		IV13[12],
   1606 		*C13=NULL,
   1607 		T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
   1608 
   1609 /* Test Case 14 */
   1610 #define K14 K13
   1611 #define A14 A13
   1612 static const u8	P14[16],
   1613 		IV14[12],
   1614 		C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
   1615 		T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
   1616 
   1617 /* Test Case 15 */
   1618 #define A15 A14
   1619 static const u8	K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
   1620 			0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
   1621 		P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
   1622 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
   1623 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
   1624 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
   1625 		IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
   1626 		C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
   1627 			0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
   1628 			0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
   1629 			0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
   1630 		T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
   1631 
   1632 /* Test Case 16 */
   1633 #define K16 K15
   1634 #define IV16 IV15
   1635 static const u8	P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
   1636 			0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
   1637 			0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
   1638 			0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
   1639 		A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
   1640 			0xab,0xad,0xda,0xd2},
   1641 		C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
   1642 			0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
   1643 			0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
   1644 			0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
   1645 		T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
   1646 
   1647 /* Test Case 17 */
   1648 #define K17 K16
   1649 #define P17 P16
   1650 #define A17 A16
   1651 static const u8	IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
   1652 		C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
   1653 			0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
   1654 			0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
   1655 			0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
   1656 		T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
   1657 
   1658 /* Test Case 18 */
   1659 #define K18 K17
   1660 #define P18 P17
   1661 #define A18 A17
   1662 static const u8	IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
   1663 			0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
   1664 			0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
   1665 			0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
   1666 		C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
   1667 			0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
   1668 			0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
   1669 			0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
   1670 		T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
   1671 
   1672 #define TEST_CASE(n)	do {					\
   1673 	u8 out[sizeof(P##n)];					\
   1674 	AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);		\
   1675 	CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);	\
   1676 	CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));		\
   1677 	memset(out,0,sizeof(out));				\
   1678 	if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));	\
   1679 	if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));	\
   1680 	if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||		\
   1681 	    (C##n && memcmp(out,C##n,sizeof(out))))		\
   1682 		ret++, printf ("encrypt test#%d failed.\n",n);	\
   1683 	CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));		\
   1684 	memset(out,0,sizeof(out));				\
   1685 	if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));	\
   1686 	if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));	\
   1687 	if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||		\
   1688 	    (P##n && memcmp(out,P##n,sizeof(out))))		\
   1689 		ret++, printf ("decrypt test#%d failed.\n",n);	\
   1690 	} while(0)
   1691 
   1692 int main()
   1693 {
   1694 	GCM128_CONTEXT ctx;
   1695 	AES_KEY key;
   1696 	int ret=0;
   1697 
   1698 	TEST_CASE(1);
   1699 	TEST_CASE(2);
   1700 	TEST_CASE(3);
   1701 	TEST_CASE(4);
   1702 	TEST_CASE(5);
   1703 	TEST_CASE(6);
   1704 	TEST_CASE(7);
   1705 	TEST_CASE(8);
   1706 	TEST_CASE(9);
   1707 	TEST_CASE(10);
   1708 	TEST_CASE(11);
   1709 	TEST_CASE(12);
   1710 	TEST_CASE(13);
   1711 	TEST_CASE(14);
   1712 	TEST_CASE(15);
   1713 	TEST_CASE(16);
   1714 	TEST_CASE(17);
   1715 	TEST_CASE(18);
   1716 
   1717 #ifdef OPENSSL_CPUID_OBJ
   1718 	{
   1719 	size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
   1720 	union { u64 u; u8 c[1024]; } buf;
   1721 	int i;
   1722 
   1723 	AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
   1724 	CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
   1725 	CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
   1726 
   1727 	CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
   1728 	start = OPENSSL_rdtsc();
   1729 	CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
   1730 	gcm_t = OPENSSL_rdtsc() - start;
   1731 
   1732 	CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
   1733 			&key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
   1734 			(block128_f)AES_encrypt);
   1735 	start = OPENSSL_rdtsc();
   1736 	CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
   1737 			&key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
   1738 			(block128_f)AES_encrypt);
   1739 	ctr_t = OPENSSL_rdtsc() - start;
   1740 
   1741 	printf("%.2f-%.2f=%.2f\n",
   1742 			gcm_t/(double)sizeof(buf),
   1743 			ctr_t/(double)sizeof(buf),
   1744 			(gcm_t-ctr_t)/(double)sizeof(buf));
   1745 #ifdef GHASH
   1746 	GHASH(&ctx,buf.c,sizeof(buf));
   1747 	start = OPENSSL_rdtsc();
   1748 	for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
   1749 	gcm_t = OPENSSL_rdtsc() - start;
   1750 	printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
   1751 #endif
   1752 	}
   1753 #endif
   1754 
   1755 	return ret;
   1756 }
   1757 #endif
   1758