1 /* ==================================================================== 2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * 3. All advertising materials mentioning features or use of this 17 * software must display the following acknowledgment: 18 * "This product includes software developed by the OpenSSL Project 19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 20 * 21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 22 * endorse or promote products derived from this software without 23 * prior written permission. For written permission, please contact 24 * openssl-core (at) openssl.org. 25 * 26 * 5. Products derived from this software may not be called "OpenSSL" 27 * nor may "OpenSSL" appear in their names without prior written 28 * permission of the OpenSSL Project. 29 * 30 * 6. Redistributions of any form whatsoever must retain the following 31 * acknowledgment: 32 * "This product includes software developed by the OpenSSL Project 33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 46 * OF THE POSSIBILITY OF SUCH DAMAGE. 47 * ==================================================================== 48 */ 49 50 #define OPENSSL_FIPSAPI 51 52 #include <openssl/crypto.h> 53 #include "modes_lcl.h" 54 #include <string.h> 55 56 #ifndef MODES_DEBUG 57 # ifndef NDEBUG 58 # define NDEBUG 59 # endif 60 #endif 61 #include <assert.h> 62 63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) 64 /* redefine, because alignment is ensured */ 65 #undef GETU32 66 #define GETU32(p) BSWAP4(*(const u32 *)(p)) 67 #undef PUTU32 68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 69 #endif 70 71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 72 #define REDUCE1BIT(V) do { \ 73 if (sizeof(size_t)==8) { \ 74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ 75 V.lo = (V.hi<<63)|(V.lo>>1); \ 76 V.hi = (V.hi>>1 )^T; \ 77 } \ 78 else { \ 79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ 80 V.lo = (V.hi<<63)|(V.lo>>1); \ 81 V.hi = (V.hi>>1 )^((u64)T<<32); \ 82 } \ 83 } while(0) 84 85 /* 86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 87 * never be set to 8. 8 is effectively reserved for testing purposes. 88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as 89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover 90 * whole spectrum of possible table driven implementations. Why? In 91 * non-"Shoup's" case memory access pattern is segmented in such manner, 92 * that it's trivial to see that cache timing information can reveal 93 * fair portion of intermediate hash value. Given that ciphertext is 94 * always available to attacker, it's possible for him to attempt to 95 * deduce secret parameter H and if successful, tamper with messages 96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's 97 * not as trivial, but there is no reason to believe that it's resistant 98 * to cache-timing attack. And the thing about "8-bit" implementation is 99 * that it consumes 16 (sixteen) times more memory, 4KB per individual 100 * key + 1KB shared. Well, on pros side it should be twice as fast as 101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version 102 * was observed to run ~75% faster, closer to 100% for commercial 103 * compilers... Yet "4-bit" procedure is preferred, because it's 104 * believed to provide better security-performance balance and adequate 105 * all-round performance. "All-round" refers to things like: 106 * 107 * - shorter setup time effectively improves overall timing for 108 * handling short messages; 109 * - larger table allocation can become unbearable because of VM 110 * subsystem penalties (for example on Windows large enough free 111 * results in VM working set trimming, meaning that consequent 112 * malloc would immediately incur working set expansion); 113 * - larger table has larger cache footprint, which can affect 114 * performance of other code paths (not necessarily even from same 115 * thread in Hyper-Threading world); 116 * 117 * Value of 1 is not appropriate for performance reasons. 118 */ 119 #if TABLE_BITS==8 120 121 static void gcm_init_8bit(u128 Htable[256], u64 H[2]) 122 { 123 int i, j; 124 u128 V; 125 126 Htable[0].hi = 0; 127 Htable[0].lo = 0; 128 V.hi = H[0]; 129 V.lo = H[1]; 130 131 for (Htable[128]=V, i=64; i>0; i>>=1) { 132 REDUCE1BIT(V); 133 Htable[i] = V; 134 } 135 136 for (i=2; i<256; i<<=1) { 137 u128 *Hi = Htable+i, H0 = *Hi; 138 for (j=1; j<i; ++j) { 139 Hi[j].hi = H0.hi^Htable[j].hi; 140 Hi[j].lo = H0.lo^Htable[j].lo; 141 } 142 } 143 } 144 145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) 146 { 147 u128 Z = { 0, 0}; 148 const u8 *xi = (const u8 *)Xi+15; 149 size_t rem, n = *xi; 150 const union { long one; char little; } is_endian = {1}; 151 static const size_t rem_8bit[256] = { 152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), 153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), 154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), 155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), 156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), 157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), 158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), 159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), 160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), 161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), 162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), 163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), 164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), 165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), 166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), 167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), 168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), 169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), 170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), 171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), 172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), 173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), 174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), 175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), 176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), 177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), 178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), 179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), 180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), 181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), 182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), 183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), 184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), 185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), 186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), 187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), 188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), 189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), 190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), 191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), 192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), 193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), 194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), 195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), 196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), 197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), 198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), 199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), 200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), 201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), 202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), 203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), 204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), 205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), 206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), 207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), 208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), 209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), 210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), 211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), 212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), 213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), 214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), 215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) }; 216 217 while (1) { 218 Z.hi ^= Htable[n].hi; 219 Z.lo ^= Htable[n].lo; 220 221 if ((u8 *)Xi==xi) break; 222 223 n = *(--xi); 224 225 rem = (size_t)Z.lo&0xff; 226 Z.lo = (Z.hi<<56)|(Z.lo>>8); 227 Z.hi = (Z.hi>>8); 228 if (sizeof(size_t)==8) 229 Z.hi ^= rem_8bit[rem]; 230 else 231 Z.hi ^= (u64)rem_8bit[rem]<<32; 232 } 233 234 if (is_endian.little) { 235 #ifdef BSWAP8 236 Xi[0] = BSWAP8(Z.hi); 237 Xi[1] = BSWAP8(Z.lo); 238 #else 239 u8 *p = (u8 *)Xi; 240 u32 v; 241 v = (u32)(Z.hi>>32); PUTU32(p,v); 242 v = (u32)(Z.hi); PUTU32(p+4,v); 243 v = (u32)(Z.lo>>32); PUTU32(p+8,v); 244 v = (u32)(Z.lo); PUTU32(p+12,v); 245 #endif 246 } 247 else { 248 Xi[0] = Z.hi; 249 Xi[1] = Z.lo; 250 } 251 } 252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) 253 254 #elif TABLE_BITS==4 255 256 static void gcm_init_4bit(u128 Htable[16], u64 H[2]) 257 { 258 u128 V; 259 #if defined(OPENSSL_SMALL_FOOTPRINT) 260 int i; 261 #endif 262 263 Htable[0].hi = 0; 264 Htable[0].lo = 0; 265 V.hi = H[0]; 266 V.lo = H[1]; 267 268 #if defined(OPENSSL_SMALL_FOOTPRINT) 269 for (Htable[8]=V, i=4; i>0; i>>=1) { 270 REDUCE1BIT(V); 271 Htable[i] = V; 272 } 273 274 for (i=2; i<16; i<<=1) { 275 u128 *Hi = Htable+i; 276 int j; 277 for (V=*Hi, j=1; j<i; ++j) { 278 Hi[j].hi = V.hi^Htable[j].hi; 279 Hi[j].lo = V.lo^Htable[j].lo; 280 } 281 } 282 #else 283 Htable[8] = V; 284 REDUCE1BIT(V); 285 Htable[4] = V; 286 REDUCE1BIT(V); 287 Htable[2] = V; 288 REDUCE1BIT(V); 289 Htable[1] = V; 290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo; 291 V=Htable[4]; 292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo; 293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo; 294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo; 295 V=Htable[8]; 296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo; 297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo; 298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo; 299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo; 300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo; 301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo; 302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo; 303 #endif 304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 305 /* 306 * ARM assembler expects specific dword order in Htable. 307 */ 308 { 309 int j; 310 const union { long one; char little; } is_endian = {1}; 311 312 if (is_endian.little) 313 for (j=0;j<16;++j) { 314 V = Htable[j]; 315 Htable[j].hi = V.lo; 316 Htable[j].lo = V.hi; 317 } 318 else 319 for (j=0;j<16;++j) { 320 V = Htable[j]; 321 Htable[j].hi = V.lo<<32|V.lo>>32; 322 Htable[j].lo = V.hi<<32|V.hi>>32; 323 } 324 } 325 #endif 326 } 327 328 #ifndef GHASH_ASM 329 static const size_t rem_4bit[16] = { 330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; 334 335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 336 { 337 u128 Z; 338 int cnt = 15; 339 size_t rem, nlo, nhi; 340 const union { long one; char little; } is_endian = {1}; 341 342 nlo = ((const u8 *)Xi)[15]; 343 nhi = nlo>>4; 344 nlo &= 0xf; 345 346 Z.hi = Htable[nlo].hi; 347 Z.lo = Htable[nlo].lo; 348 349 while (1) { 350 rem = (size_t)Z.lo&0xf; 351 Z.lo = (Z.hi<<60)|(Z.lo>>4); 352 Z.hi = (Z.hi>>4); 353 if (sizeof(size_t)==8) 354 Z.hi ^= rem_4bit[rem]; 355 else 356 Z.hi ^= (u64)rem_4bit[rem]<<32; 357 358 Z.hi ^= Htable[nhi].hi; 359 Z.lo ^= Htable[nhi].lo; 360 361 if (--cnt<0) break; 362 363 nlo = ((const u8 *)Xi)[cnt]; 364 nhi = nlo>>4; 365 nlo &= 0xf; 366 367 rem = (size_t)Z.lo&0xf; 368 Z.lo = (Z.hi<<60)|(Z.lo>>4); 369 Z.hi = (Z.hi>>4); 370 if (sizeof(size_t)==8) 371 Z.hi ^= rem_4bit[rem]; 372 else 373 Z.hi ^= (u64)rem_4bit[rem]<<32; 374 375 Z.hi ^= Htable[nlo].hi; 376 Z.lo ^= Htable[nlo].lo; 377 } 378 379 if (is_endian.little) { 380 #ifdef BSWAP8 381 Xi[0] = BSWAP8(Z.hi); 382 Xi[1] = BSWAP8(Z.lo); 383 #else 384 u8 *p = (u8 *)Xi; 385 u32 v; 386 v = (u32)(Z.hi>>32); PUTU32(p,v); 387 v = (u32)(Z.hi); PUTU32(p+4,v); 388 v = (u32)(Z.lo>>32); PUTU32(p+8,v); 389 v = (u32)(Z.lo); PUTU32(p+12,v); 390 #endif 391 } 392 else { 393 Xi[0] = Z.hi; 394 Xi[1] = Z.lo; 395 } 396 } 397 398 #if !defined(OPENSSL_SMALL_FOOTPRINT) 399 /* 400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 401 * details... Compiler-generated code doesn't seem to give any 402 * performance improvement, at least not on x86[_64]. It's here 403 * mostly as reference and a placeholder for possible future 404 * non-trivial optimization[s]... 405 */ 406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], 407 const u8 *inp,size_t len) 408 { 409 u128 Z; 410 int cnt; 411 size_t rem, nlo, nhi; 412 const union { long one; char little; } is_endian = {1}; 413 414 #if 1 415 do { 416 cnt = 15; 417 nlo = ((const u8 *)Xi)[15]; 418 nlo ^= inp[15]; 419 nhi = nlo>>4; 420 nlo &= 0xf; 421 422 Z.hi = Htable[nlo].hi; 423 Z.lo = Htable[nlo].lo; 424 425 while (1) { 426 rem = (size_t)Z.lo&0xf; 427 Z.lo = (Z.hi<<60)|(Z.lo>>4); 428 Z.hi = (Z.hi>>4); 429 if (sizeof(size_t)==8) 430 Z.hi ^= rem_4bit[rem]; 431 else 432 Z.hi ^= (u64)rem_4bit[rem]<<32; 433 434 Z.hi ^= Htable[nhi].hi; 435 Z.lo ^= Htable[nhi].lo; 436 437 if (--cnt<0) break; 438 439 nlo = ((const u8 *)Xi)[cnt]; 440 nlo ^= inp[cnt]; 441 nhi = nlo>>4; 442 nlo &= 0xf; 443 444 rem = (size_t)Z.lo&0xf; 445 Z.lo = (Z.hi<<60)|(Z.lo>>4); 446 Z.hi = (Z.hi>>4); 447 if (sizeof(size_t)==8) 448 Z.hi ^= rem_4bit[rem]; 449 else 450 Z.hi ^= (u64)rem_4bit[rem]<<32; 451 452 Z.hi ^= Htable[nlo].hi; 453 Z.lo ^= Htable[nlo].lo; 454 } 455 #else 456 /* 457 * Extra 256+16 bytes per-key plus 512 bytes shared tables 458 * [should] give ~50% improvement... One could have PACK()-ed 459 * the rem_8bit even here, but the priority is to minimize 460 * cache footprint... 461 */ 462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */ 463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */ 464 static const unsigned short rem_8bit[256] = { 465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, 466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, 467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, 468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, 469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, 470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, 471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, 472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, 473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, 474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, 475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, 476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, 477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, 478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, 479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, 480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, 481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, 482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, 483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, 484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, 485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, 486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, 487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, 488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, 489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, 490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, 491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, 492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, 493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, 494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, 495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, 496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE }; 497 /* 498 * This pre-processing phase slows down procedure by approximately 499 * same time as it makes each loop spin faster. In other words 500 * single block performance is approximately same as straightforward 501 * "4-bit" implementation, and then it goes only faster... 502 */ 503 for (cnt=0; cnt<16; ++cnt) { 504 Z.hi = Htable[cnt].hi; 505 Z.lo = Htable[cnt].lo; 506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4); 507 Hshr4[cnt].hi = (Z.hi>>4); 508 Hshl4[cnt] = (u8)(Z.lo<<4); 509 } 510 511 do { 512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) { 513 nlo = ((const u8 *)Xi)[cnt]; 514 nlo ^= inp[cnt]; 515 nhi = nlo>>4; 516 nlo &= 0xf; 517 518 Z.hi ^= Htable[nlo].hi; 519 Z.lo ^= Htable[nlo].lo; 520 521 rem = (size_t)Z.lo&0xff; 522 523 Z.lo = (Z.hi<<56)|(Z.lo>>8); 524 Z.hi = (Z.hi>>8); 525 526 Z.hi ^= Hshr4[nhi].hi; 527 Z.lo ^= Hshr4[nhi].lo; 528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48; 529 } 530 531 nlo = ((const u8 *)Xi)[0]; 532 nlo ^= inp[0]; 533 nhi = nlo>>4; 534 nlo &= 0xf; 535 536 Z.hi ^= Htable[nlo].hi; 537 Z.lo ^= Htable[nlo].lo; 538 539 rem = (size_t)Z.lo&0xf; 540 541 Z.lo = (Z.hi<<60)|(Z.lo>>4); 542 Z.hi = (Z.hi>>4); 543 544 Z.hi ^= Htable[nhi].hi; 545 Z.lo ^= Htable[nhi].lo; 546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48; 547 #endif 548 549 if (is_endian.little) { 550 #ifdef BSWAP8 551 Xi[0] = BSWAP8(Z.hi); 552 Xi[1] = BSWAP8(Z.lo); 553 #else 554 u8 *p = (u8 *)Xi; 555 u32 v; 556 v = (u32)(Z.hi>>32); PUTU32(p,v); 557 v = (u32)(Z.hi); PUTU32(p+4,v); 558 v = (u32)(Z.lo>>32); PUTU32(p+8,v); 559 v = (u32)(Z.lo); PUTU32(p+12,v); 560 #endif 561 } 562 else { 563 Xi[0] = Z.hi; 564 Xi[1] = Z.lo; 565 } 566 } while (inp+=16, len-=16); 567 } 568 #endif 569 #else 570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]); 571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); 572 #endif 573 574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) 576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache 578 * trashing effect. In other words idea is to hash data while it's 579 * still in L1 cache after encryption pass... */ 580 #define GHASH_CHUNK (3*1024) 581 #endif 582 583 #else /* TABLE_BITS */ 584 585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) 586 { 587 u128 V,Z = { 0,0 }; 588 long X; 589 int i,j; 590 const long *xi = (const long *)Xi; 591 const union { long one; char little; } is_endian = {1}; 592 593 V.hi = H[0]; /* H is in host byte order, no byte swapping */ 594 V.lo = H[1]; 595 596 for (j=0; j<16/sizeof(long); ++j) { 597 if (is_endian.little) { 598 if (sizeof(long)==8) { 599 #ifdef BSWAP8 600 X = (long)(BSWAP8(xi[j])); 601 #else 602 const u8 *p = (const u8 *)(xi+j); 603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4)); 604 #endif 605 } 606 else { 607 const u8 *p = (const u8 *)(xi+j); 608 X = (long)GETU32(p); 609 } 610 } 611 else 612 X = xi[j]; 613 614 for (i=0; i<8*sizeof(long); ++i, X<<=1) { 615 u64 M = (u64)(X>>(8*sizeof(long)-1)); 616 Z.hi ^= V.hi&M; 617 Z.lo ^= V.lo&M; 618 619 REDUCE1BIT(V); 620 } 621 } 622 623 if (is_endian.little) { 624 #ifdef BSWAP8 625 Xi[0] = BSWAP8(Z.hi); 626 Xi[1] = BSWAP8(Z.lo); 627 #else 628 u8 *p = (u8 *)Xi; 629 u32 v; 630 v = (u32)(Z.hi>>32); PUTU32(p,v); 631 v = (u32)(Z.hi); PUTU32(p+4,v); 632 v = (u32)(Z.lo>>32); PUTU32(p+8,v); 633 v = (u32)(Z.lo); PUTU32(p+12,v); 634 #endif 635 } 636 else { 637 Xi[0] = Z.hi; 638 Xi[1] = Z.lo; 639 } 640 } 641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) 642 643 #endif 644 645 #if TABLE_BITS==4 && defined(GHASH_ASM) 646 # if !defined(I386_ONLY) && \ 647 (defined(__i386) || defined(__i386__) || \ 648 defined(__x86_64) || defined(__x86_64__) || \ 649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 650 # define GHASH_ASM_X86_OR_64 651 # define GCM_FUNCREF_4BIT 652 extern unsigned int OPENSSL_ia32cap_P[2]; 653 654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); 655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); 656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); 657 658 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 659 # define GHASH_ASM_X86 660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]); 661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); 662 663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); 664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); 665 # endif 666 # elif defined(__arm__) || defined(__arm) 667 # include "arm_arch.h" 668 # if __ARM_ARCH__>=7 669 # define GHASH_ASM_ARM 670 # define GCM_FUNCREF_4BIT 671 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); 672 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); 673 # endif 674 # endif 675 #endif 676 677 #ifdef GCM_FUNCREF_4BIT 678 # undef GCM_MUL 679 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 680 # ifdef GHASH 681 # undef GHASH 682 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 683 # endif 684 #endif 685 686 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) 687 { 688 const union { long one; char little; } is_endian = {1}; 689 690 memset(ctx,0,sizeof(*ctx)); 691 ctx->block = block; 692 ctx->key = key; 693 694 (*block)(ctx->H.c,ctx->H.c,key); 695 696 if (is_endian.little) { 697 /* H is stored in host byte order */ 698 #ifdef BSWAP8 699 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 700 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 701 #else 702 u8 *p = ctx->H.c; 703 u64 hi,lo; 704 hi = (u64)GETU32(p) <<32|GETU32(p+4); 705 lo = (u64)GETU32(p+8)<<32|GETU32(p+12); 706 ctx->H.u[0] = hi; 707 ctx->H.u[1] = lo; 708 #endif 709 } 710 711 #if TABLE_BITS==8 712 gcm_init_8bit(ctx->Htable,ctx->H.u); 713 #elif TABLE_BITS==4 714 # if defined(GHASH_ASM_X86_OR_64) 715 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 716 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ 717 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ 718 gcm_init_clmul(ctx->Htable,ctx->H.u); 719 ctx->gmult = gcm_gmult_clmul; 720 ctx->ghash = gcm_ghash_clmul; 721 return; 722 } 723 # endif 724 gcm_init_4bit(ctx->Htable,ctx->H.u); 725 # if defined(GHASH_ASM_X86) /* x86 only */ 726 # if defined(OPENSSL_IA32_SSE2) 727 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ 728 # else 729 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ 730 # endif 731 ctx->gmult = gcm_gmult_4bit_mmx; 732 ctx->ghash = gcm_ghash_4bit_mmx; 733 } else { 734 ctx->gmult = gcm_gmult_4bit_x86; 735 ctx->ghash = gcm_ghash_4bit_x86; 736 } 737 # else 738 ctx->gmult = gcm_gmult_4bit; 739 ctx->ghash = gcm_ghash_4bit; 740 # endif 741 # elif defined(GHASH_ASM_ARM) 742 if (OPENSSL_armcap_P & ARMV7_NEON) { 743 ctx->gmult = gcm_gmult_neon; 744 ctx->ghash = gcm_ghash_neon; 745 } else { 746 gcm_init_4bit(ctx->Htable,ctx->H.u); 747 ctx->gmult = gcm_gmult_4bit; 748 ctx->ghash = gcm_ghash_4bit; 749 } 750 # else 751 gcm_init_4bit(ctx->Htable,ctx->H.u); 752 # endif 753 #endif 754 } 755 756 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) 757 { 758 const union { long one; char little; } is_endian = {1}; 759 unsigned int ctr; 760 #ifdef GCM_FUNCREF_4BIT 761 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 762 #endif 763 764 ctx->Yi.u[0] = 0; 765 ctx->Yi.u[1] = 0; 766 ctx->Xi.u[0] = 0; 767 ctx->Xi.u[1] = 0; 768 ctx->len.u[0] = 0; /* AAD length */ 769 ctx->len.u[1] = 0; /* message length */ 770 ctx->ares = 0; 771 ctx->mres = 0; 772 773 if (len==12) { 774 memcpy(ctx->Yi.c,iv,12); 775 ctx->Yi.c[15]=1; 776 ctr=1; 777 } 778 else { 779 size_t i; 780 u64 len0 = len; 781 782 while (len>=16) { 783 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i]; 784 GCM_MUL(ctx,Yi); 785 iv += 16; 786 len -= 16; 787 } 788 if (len) { 789 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i]; 790 GCM_MUL(ctx,Yi); 791 } 792 len0 <<= 3; 793 if (is_endian.little) { 794 #ifdef BSWAP8 795 ctx->Yi.u[1] ^= BSWAP8(len0); 796 #else 797 ctx->Yi.c[8] ^= (u8)(len0>>56); 798 ctx->Yi.c[9] ^= (u8)(len0>>48); 799 ctx->Yi.c[10] ^= (u8)(len0>>40); 800 ctx->Yi.c[11] ^= (u8)(len0>>32); 801 ctx->Yi.c[12] ^= (u8)(len0>>24); 802 ctx->Yi.c[13] ^= (u8)(len0>>16); 803 ctx->Yi.c[14] ^= (u8)(len0>>8); 804 ctx->Yi.c[15] ^= (u8)(len0); 805 #endif 806 } 807 else 808 ctx->Yi.u[1] ^= len0; 809 810 GCM_MUL(ctx,Yi); 811 812 if (is_endian.little) 813 ctr = GETU32(ctx->Yi.c+12); 814 else 815 ctr = ctx->Yi.d[3]; 816 } 817 818 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); 819 ++ctr; 820 if (is_endian.little) 821 PUTU32(ctx->Yi.c+12,ctr); 822 else 823 ctx->Yi.d[3] = ctr; 824 } 825 826 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) 827 { 828 size_t i; 829 unsigned int n; 830 u64 alen = ctx->len.u[0]; 831 #ifdef GCM_FUNCREF_4BIT 832 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 833 # ifdef GHASH 834 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], 835 const u8 *inp,size_t len) = ctx->ghash; 836 # endif 837 #endif 838 839 if (ctx->len.u[1]) return -2; 840 841 alen += len; 842 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len)) 843 return -1; 844 ctx->len.u[0] = alen; 845 846 n = ctx->ares; 847 if (n) { 848 while (n && len) { 849 ctx->Xi.c[n] ^= *(aad++); 850 --len; 851 n = (n+1)%16; 852 } 853 if (n==0) GCM_MUL(ctx,Xi); 854 else { 855 ctx->ares = n; 856 return 0; 857 } 858 } 859 860 #ifdef GHASH 861 if ((i = (len&(size_t)-16))) { 862 GHASH(ctx,aad,i); 863 aad += i; 864 len -= i; 865 } 866 #else 867 while (len>=16) { 868 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i]; 869 GCM_MUL(ctx,Xi); 870 aad += 16; 871 len -= 16; 872 } 873 #endif 874 if (len) { 875 n = (unsigned int)len; 876 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i]; 877 } 878 879 ctx->ares = n; 880 return 0; 881 } 882 883 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 884 const unsigned char *in, unsigned char *out, 885 size_t len) 886 { 887 const union { long one; char little; } is_endian = {1}; 888 unsigned int n, ctr; 889 size_t i; 890 u64 mlen = ctx->len.u[1]; 891 block128_f block = ctx->block; 892 void *key = ctx->key; 893 #ifdef GCM_FUNCREF_4BIT 894 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 895 # ifdef GHASH 896 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], 897 const u8 *inp,size_t len) = ctx->ghash; 898 # endif 899 #endif 900 901 #if 0 902 n = (unsigned int)mlen%16; /* alternative to ctx->mres */ 903 #endif 904 mlen += len; 905 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) 906 return -1; 907 ctx->len.u[1] = mlen; 908 909 if (ctx->ares) { 910 /* First call to encrypt finalizes GHASH(AAD) */ 911 GCM_MUL(ctx,Xi); 912 ctx->ares = 0; 913 } 914 915 if (is_endian.little) 916 ctr = GETU32(ctx->Yi.c+12); 917 else 918 ctr = ctx->Yi.d[3]; 919 920 n = ctx->mres; 921 #if !defined(OPENSSL_SMALL_FOOTPRINT) 922 if (16%sizeof(size_t) == 0) do { /* always true actually */ 923 if (n) { 924 while (n && len) { 925 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; 926 --len; 927 n = (n+1)%16; 928 } 929 if (n==0) GCM_MUL(ctx,Xi); 930 else { 931 ctx->mres = n; 932 return 0; 933 } 934 } 935 #if defined(STRICT_ALIGNMENT) 936 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) 937 break; 938 #endif 939 #if defined(GHASH) && defined(GHASH_CHUNK) 940 while (len>=GHASH_CHUNK) { 941 size_t j=GHASH_CHUNK; 942 943 while (j) { 944 (*block)(ctx->Yi.c,ctx->EKi.c,key); 945 ++ctr; 946 if (is_endian.little) 947 PUTU32(ctx->Yi.c+12,ctr); 948 else 949 ctx->Yi.d[3] = ctr; 950 for (i=0; i<16; i+=sizeof(size_t)) 951 *(size_t *)(out+i) = 952 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); 953 out += 16; 954 in += 16; 955 j -= 16; 956 } 957 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK); 958 len -= GHASH_CHUNK; 959 } 960 if ((i = (len&(size_t)-16))) { 961 size_t j=i; 962 963 while (len>=16) { 964 (*block)(ctx->Yi.c,ctx->EKi.c,key); 965 ++ctr; 966 if (is_endian.little) 967 PUTU32(ctx->Yi.c+12,ctr); 968 else 969 ctx->Yi.d[3] = ctr; 970 for (i=0; i<16; i+=sizeof(size_t)) 971 *(size_t *)(out+i) = 972 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); 973 out += 16; 974 in += 16; 975 len -= 16; 976 } 977 GHASH(ctx,out-j,j); 978 } 979 #else 980 while (len>=16) { 981 (*block)(ctx->Yi.c,ctx->EKi.c,key); 982 ++ctr; 983 if (is_endian.little) 984 PUTU32(ctx->Yi.c+12,ctr); 985 else 986 ctx->Yi.d[3] = ctr; 987 for (i=0; i<16; i+=sizeof(size_t)) 988 *(size_t *)(ctx->Xi.c+i) ^= 989 *(size_t *)(out+i) = 990 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); 991 GCM_MUL(ctx,Xi); 992 out += 16; 993 in += 16; 994 len -= 16; 995 } 996 #endif 997 if (len) { 998 (*block)(ctx->Yi.c,ctx->EKi.c,key); 999 ++ctr; 1000 if (is_endian.little) 1001 PUTU32(ctx->Yi.c+12,ctr); 1002 else 1003 ctx->Yi.d[3] = ctr; 1004 while (len--) { 1005 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; 1006 ++n; 1007 } 1008 } 1009 1010 ctx->mres = n; 1011 return 0; 1012 } while(0); 1013 #endif 1014 for (i=0;i<len;++i) { 1015 if (n==0) { 1016 (*block)(ctx->Yi.c,ctx->EKi.c,key); 1017 ++ctr; 1018 if (is_endian.little) 1019 PUTU32(ctx->Yi.c+12,ctr); 1020 else 1021 ctx->Yi.d[3] = ctr; 1022 } 1023 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n]; 1024 n = (n+1)%16; 1025 if (n==0) 1026 GCM_MUL(ctx,Xi); 1027 } 1028 1029 ctx->mres = n; 1030 return 0; 1031 } 1032 1033 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 1034 const unsigned char *in, unsigned char *out, 1035 size_t len) 1036 { 1037 const union { long one; char little; } is_endian = {1}; 1038 unsigned int n, ctr; 1039 size_t i; 1040 u64 mlen = ctx->len.u[1]; 1041 block128_f block = ctx->block; 1042 void *key = ctx->key; 1043 #ifdef GCM_FUNCREF_4BIT 1044 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 1045 # ifdef GHASH 1046 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], 1047 const u8 *inp,size_t len) = ctx->ghash; 1048 # endif 1049 #endif 1050 1051 mlen += len; 1052 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) 1053 return -1; 1054 ctx->len.u[1] = mlen; 1055 1056 if (ctx->ares) { 1057 /* First call to decrypt finalizes GHASH(AAD) */ 1058 GCM_MUL(ctx,Xi); 1059 ctx->ares = 0; 1060 } 1061 1062 if (is_endian.little) 1063 ctr = GETU32(ctx->Yi.c+12); 1064 else 1065 ctr = ctx->Yi.d[3]; 1066 1067 n = ctx->mres; 1068 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1069 if (16%sizeof(size_t) == 0) do { /* always true actually */ 1070 if (n) { 1071 while (n && len) { 1072 u8 c = *(in++); 1073 *(out++) = c^ctx->EKi.c[n]; 1074 ctx->Xi.c[n] ^= c; 1075 --len; 1076 n = (n+1)%16; 1077 } 1078 if (n==0) GCM_MUL (ctx,Xi); 1079 else { 1080 ctx->mres = n; 1081 return 0; 1082 } 1083 } 1084 #if defined(STRICT_ALIGNMENT) 1085 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) 1086 break; 1087 #endif 1088 #if defined(GHASH) && defined(GHASH_CHUNK) 1089 while (len>=GHASH_CHUNK) { 1090 size_t j=GHASH_CHUNK; 1091 1092 GHASH(ctx,in,GHASH_CHUNK); 1093 while (j) { 1094 (*block)(ctx->Yi.c,ctx->EKi.c,key); 1095 ++ctr; 1096 if (is_endian.little) 1097 PUTU32(ctx->Yi.c+12,ctr); 1098 else 1099 ctx->Yi.d[3] = ctr; 1100 for (i=0; i<16; i+=sizeof(size_t)) 1101 *(size_t *)(out+i) = 1102 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); 1103 out += 16; 1104 in += 16; 1105 j -= 16; 1106 } 1107 len -= GHASH_CHUNK; 1108 } 1109 if ((i = (len&(size_t)-16))) { 1110 GHASH(ctx,in,i); 1111 while (len>=16) { 1112 (*block)(ctx->Yi.c,ctx->EKi.c,key); 1113 ++ctr; 1114 if (is_endian.little) 1115 PUTU32(ctx->Yi.c+12,ctr); 1116 else 1117 ctx->Yi.d[3] = ctr; 1118 for (i=0; i<16; i+=sizeof(size_t)) 1119 *(size_t *)(out+i) = 1120 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); 1121 out += 16; 1122 in += 16; 1123 len -= 16; 1124 } 1125 } 1126 #else 1127 while (len>=16) { 1128 (*block)(ctx->Yi.c,ctx->EKi.c,key); 1129 ++ctr; 1130 if (is_endian.little) 1131 PUTU32(ctx->Yi.c+12,ctr); 1132 else 1133 ctx->Yi.d[3] = ctr; 1134 for (i=0; i<16; i+=sizeof(size_t)) { 1135 size_t c = *(size_t *)(in+i); 1136 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i); 1137 *(size_t *)(ctx->Xi.c+i) ^= c; 1138 } 1139 GCM_MUL(ctx,Xi); 1140 out += 16; 1141 in += 16; 1142 len -= 16; 1143 } 1144 #endif 1145 if (len) { 1146 (*block)(ctx->Yi.c,ctx->EKi.c,key); 1147 ++ctr; 1148 if (is_endian.little) 1149 PUTU32(ctx->Yi.c+12,ctr); 1150 else 1151 ctx->Yi.d[3] = ctr; 1152 while (len--) { 1153 u8 c = in[n]; 1154 ctx->Xi.c[n] ^= c; 1155 out[n] = c^ctx->EKi.c[n]; 1156 ++n; 1157 } 1158 } 1159 1160 ctx->mres = n; 1161 return 0; 1162 } while(0); 1163 #endif 1164 for (i=0;i<len;++i) { 1165 u8 c; 1166 if (n==0) { 1167 (*block)(ctx->Yi.c,ctx->EKi.c,key); 1168 ++ctr; 1169 if (is_endian.little) 1170 PUTU32(ctx->Yi.c+12,ctr); 1171 else 1172 ctx->Yi.d[3] = ctr; 1173 } 1174 c = in[i]; 1175 out[i] = c^ctx->EKi.c[n]; 1176 ctx->Xi.c[n] ^= c; 1177 n = (n+1)%16; 1178 if (n==0) 1179 GCM_MUL(ctx,Xi); 1180 } 1181 1182 ctx->mres = n; 1183 return 0; 1184 } 1185 1186 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 1187 const unsigned char *in, unsigned char *out, 1188 size_t len, ctr128_f stream) 1189 { 1190 const union { long one; char little; } is_endian = {1}; 1191 unsigned int n, ctr; 1192 size_t i; 1193 u64 mlen = ctx->len.u[1]; 1194 void *key = ctx->key; 1195 #ifdef GCM_FUNCREF_4BIT 1196 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 1197 # ifdef GHASH 1198 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], 1199 const u8 *inp,size_t len) = ctx->ghash; 1200 # endif 1201 #endif 1202 1203 mlen += len; 1204 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) 1205 return -1; 1206 ctx->len.u[1] = mlen; 1207 1208 if (ctx->ares) { 1209 /* First call to encrypt finalizes GHASH(AAD) */ 1210 GCM_MUL(ctx,Xi); 1211 ctx->ares = 0; 1212 } 1213 1214 if (is_endian.little) 1215 ctr = GETU32(ctx->Yi.c+12); 1216 else 1217 ctr = ctx->Yi.d[3]; 1218 1219 n = ctx->mres; 1220 if (n) { 1221 while (n && len) { 1222 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; 1223 --len; 1224 n = (n+1)%16; 1225 } 1226 if (n==0) GCM_MUL(ctx,Xi); 1227 else { 1228 ctx->mres = n; 1229 return 0; 1230 } 1231 } 1232 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1233 while (len>=GHASH_CHUNK) { 1234 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); 1235 ctr += GHASH_CHUNK/16; 1236 if (is_endian.little) 1237 PUTU32(ctx->Yi.c+12,ctr); 1238 else 1239 ctx->Yi.d[3] = ctr; 1240 GHASH(ctx,out,GHASH_CHUNK); 1241 out += GHASH_CHUNK; 1242 in += GHASH_CHUNK; 1243 len -= GHASH_CHUNK; 1244 } 1245 #endif 1246 if ((i = (len&(size_t)-16))) { 1247 size_t j=i/16; 1248 1249 (*stream)(in,out,j,key,ctx->Yi.c); 1250 ctr += (unsigned int)j; 1251 if (is_endian.little) 1252 PUTU32(ctx->Yi.c+12,ctr); 1253 else 1254 ctx->Yi.d[3] = ctr; 1255 in += i; 1256 len -= i; 1257 #if defined(GHASH) 1258 GHASH(ctx,out,i); 1259 out += i; 1260 #else 1261 while (j--) { 1262 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i]; 1263 GCM_MUL(ctx,Xi); 1264 out += 16; 1265 } 1266 #endif 1267 } 1268 if (len) { 1269 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); 1270 ++ctr; 1271 if (is_endian.little) 1272 PUTU32(ctx->Yi.c+12,ctr); 1273 else 1274 ctx->Yi.d[3] = ctr; 1275 while (len--) { 1276 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; 1277 ++n; 1278 } 1279 } 1280 1281 ctx->mres = n; 1282 return 0; 1283 } 1284 1285 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 1286 const unsigned char *in, unsigned char *out, 1287 size_t len,ctr128_f stream) 1288 { 1289 const union { long one; char little; } is_endian = {1}; 1290 unsigned int n, ctr; 1291 size_t i; 1292 u64 mlen = ctx->len.u[1]; 1293 void *key = ctx->key; 1294 #ifdef GCM_FUNCREF_4BIT 1295 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 1296 # ifdef GHASH 1297 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], 1298 const u8 *inp,size_t len) = ctx->ghash; 1299 # endif 1300 #endif 1301 1302 mlen += len; 1303 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) 1304 return -1; 1305 ctx->len.u[1] = mlen; 1306 1307 if (ctx->ares) { 1308 /* First call to decrypt finalizes GHASH(AAD) */ 1309 GCM_MUL(ctx,Xi); 1310 ctx->ares = 0; 1311 } 1312 1313 if (is_endian.little) 1314 ctr = GETU32(ctx->Yi.c+12); 1315 else 1316 ctr = ctx->Yi.d[3]; 1317 1318 n = ctx->mres; 1319 if (n) { 1320 while (n && len) { 1321 u8 c = *(in++); 1322 *(out++) = c^ctx->EKi.c[n]; 1323 ctx->Xi.c[n] ^= c; 1324 --len; 1325 n = (n+1)%16; 1326 } 1327 if (n==0) GCM_MUL (ctx,Xi); 1328 else { 1329 ctx->mres = n; 1330 return 0; 1331 } 1332 } 1333 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1334 while (len>=GHASH_CHUNK) { 1335 GHASH(ctx,in,GHASH_CHUNK); 1336 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); 1337 ctr += GHASH_CHUNK/16; 1338 if (is_endian.little) 1339 PUTU32(ctx->Yi.c+12,ctr); 1340 else 1341 ctx->Yi.d[3] = ctr; 1342 out += GHASH_CHUNK; 1343 in += GHASH_CHUNK; 1344 len -= GHASH_CHUNK; 1345 } 1346 #endif 1347 if ((i = (len&(size_t)-16))) { 1348 size_t j=i/16; 1349 1350 #if defined(GHASH) 1351 GHASH(ctx,in,i); 1352 #else 1353 while (j--) { 1354 size_t k; 1355 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k]; 1356 GCM_MUL(ctx,Xi); 1357 in += 16; 1358 } 1359 j = i/16; 1360 in -= i; 1361 #endif 1362 (*stream)(in,out,j,key,ctx->Yi.c); 1363 ctr += (unsigned int)j; 1364 if (is_endian.little) 1365 PUTU32(ctx->Yi.c+12,ctr); 1366 else 1367 ctx->Yi.d[3] = ctr; 1368 out += i; 1369 in += i; 1370 len -= i; 1371 } 1372 if (len) { 1373 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); 1374 ++ctr; 1375 if (is_endian.little) 1376 PUTU32(ctx->Yi.c+12,ctr); 1377 else 1378 ctx->Yi.d[3] = ctr; 1379 while (len--) { 1380 u8 c = in[n]; 1381 ctx->Xi.c[n] ^= c; 1382 out[n] = c^ctx->EKi.c[n]; 1383 ++n; 1384 } 1385 } 1386 1387 ctx->mres = n; 1388 return 0; 1389 } 1390 1391 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, 1392 size_t len) 1393 { 1394 const union { long one; char little; } is_endian = {1}; 1395 u64 alen = ctx->len.u[0]<<3; 1396 u64 clen = ctx->len.u[1]<<3; 1397 #ifdef GCM_FUNCREF_4BIT 1398 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; 1399 #endif 1400 1401 if (ctx->mres) 1402 GCM_MUL(ctx,Xi); 1403 1404 if (is_endian.little) { 1405 #ifdef BSWAP8 1406 alen = BSWAP8(alen); 1407 clen = BSWAP8(clen); 1408 #else 1409 u8 *p = ctx->len.c; 1410 1411 ctx->len.u[0] = alen; 1412 ctx->len.u[1] = clen; 1413 1414 alen = (u64)GETU32(p) <<32|GETU32(p+4); 1415 clen = (u64)GETU32(p+8)<<32|GETU32(p+12); 1416 #endif 1417 } 1418 1419 ctx->Xi.u[0] ^= alen; 1420 ctx->Xi.u[1] ^= clen; 1421 GCM_MUL(ctx,Xi); 1422 1423 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1424 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1425 1426 if (tag && len<=sizeof(ctx->Xi)) 1427 return memcmp(ctx->Xi.c,tag,len); 1428 else 1429 return -1; 1430 } 1431 1432 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 1433 { 1434 CRYPTO_gcm128_finish(ctx, NULL, 0); 1435 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c)); 1436 } 1437 1438 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) 1439 { 1440 GCM128_CONTEXT *ret; 1441 1442 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT)))) 1443 CRYPTO_gcm128_init(ret,key,block); 1444 1445 return ret; 1446 } 1447 1448 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) 1449 { 1450 if (ctx) { 1451 OPENSSL_cleanse(ctx,sizeof(*ctx)); 1452 OPENSSL_free(ctx); 1453 } 1454 } 1455 1456 #if defined(SELFTEST) 1457 #include <stdio.h> 1458 #include <openssl/aes.h> 1459 1460 /* Test Case 1 */ 1461 static const u8 K1[16], 1462 *P1=NULL, 1463 *A1=NULL, 1464 IV1[12], 1465 *C1=NULL, 1466 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a}; 1467 1468 /* Test Case 2 */ 1469 #define K2 K1 1470 #define A2 A1 1471 #define IV2 IV1 1472 static const u8 P2[16], 1473 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78}, 1474 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf}; 1475 1476 /* Test Case 3 */ 1477 #define A3 A2 1478 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08}, 1479 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, 1480 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, 1481 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, 1482 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, 1483 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, 1484 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, 1485 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, 1486 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, 1487 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85}, 1488 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4}; 1489 1490 /* Test Case 4 */ 1491 #define K4 K3 1492 #define IV4 IV3 1493 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, 1494 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, 1495 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, 1496 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, 1497 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, 1498 0xab,0xad,0xda,0xd2}, 1499 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, 1500 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, 1501 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, 1502 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91}, 1503 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47}; 1504 1505 /* Test Case 5 */ 1506 #define K5 K4 1507 #define P5 P4 1508 #define A5 A4 1509 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, 1510 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55, 1511 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23, 1512 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42, 1513 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98}, 1514 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb}; 1515 1516 /* Test Case 6 */ 1517 #define K6 K5 1518 #define P6 P5 1519 #define A6 A5 1520 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, 1521 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, 1522 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, 1523 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, 1524 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94, 1525 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7, 1526 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f, 1527 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5}, 1528 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50}; 1529 1530 /* Test Case 7 */ 1531 static const u8 K7[24], 1532 *P7=NULL, 1533 *A7=NULL, 1534 IV7[12], 1535 *C7=NULL, 1536 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35}; 1537 1538 /* Test Case 8 */ 1539 #define K8 K7 1540 #define IV8 IV7 1541 #define A8 A7 1542 static const u8 P8[16], 1543 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00}, 1544 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb}; 1545 1546 /* Test Case 9 */ 1547 #define A9 A8 1548 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08, 1549 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c}, 1550 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, 1551 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, 1552 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, 1553 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, 1554 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, 1555 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57, 1556 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c, 1557 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47, 1558 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56}, 1559 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14}; 1560 1561 /* Test Case 10 */ 1562 #define K10 K9 1563 #define IV10 IV9 1564 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, 1565 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, 1566 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, 1567 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, 1568 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, 1569 0xab,0xad,0xda,0xd2}, 1570 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57, 1571 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c, 1572 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47, 1573 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10}, 1574 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c}; 1575 1576 /* Test Case 11 */ 1577 #define K11 K10 1578 #define P11 P10 1579 #define A11 A10 1580 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, 1581 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8, 1582 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57, 1583 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9, 1584 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7}, 1585 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8}; 1586 1587 /* Test Case 12 */ 1588 #define K12 K11 1589 #define P12 P11 1590 #define A12 A11 1591 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, 1592 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, 1593 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, 1594 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, 1595 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff, 1596 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45, 1597 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3, 1598 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b}, 1599 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9}; 1600 1601 /* Test Case 13 */ 1602 static const u8 K13[32], 1603 *P13=NULL, 1604 *A13=NULL, 1605 IV13[12], 1606 *C13=NULL, 1607 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b}; 1608 1609 /* Test Case 14 */ 1610 #define K14 K13 1611 #define A14 A13 1612 static const u8 P14[16], 1613 IV14[12], 1614 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18}, 1615 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19}; 1616 1617 /* Test Case 15 */ 1618 #define A15 A14 1619 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08, 1620 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08}, 1621 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, 1622 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, 1623 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, 1624 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, 1625 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, 1626 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, 1627 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, 1628 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, 1629 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad}, 1630 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c}; 1631 1632 /* Test Case 16 */ 1633 #define K16 K15 1634 #define IV16 IV15 1635 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, 1636 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, 1637 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, 1638 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, 1639 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, 1640 0xab,0xad,0xda,0xd2}, 1641 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, 1642 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, 1643 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, 1644 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62}, 1645 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b}; 1646 1647 /* Test Case 17 */ 1648 #define K17 K16 1649 #define P17 P16 1650 #define A17 A16 1651 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, 1652 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb, 1653 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0, 1654 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78, 1655 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f}, 1656 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2}; 1657 1658 /* Test Case 18 */ 1659 #define K18 K17 1660 #define P18 P17 1661 #define A18 A17 1662 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, 1663 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, 1664 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, 1665 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, 1666 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20, 1667 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4, 1668 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde, 1669 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f}, 1670 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a}; 1671 1672 #define TEST_CASE(n) do { \ 1673 u8 out[sizeof(P##n)]; \ 1674 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \ 1675 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \ 1676 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ 1677 memset(out,0,sizeof(out)); \ 1678 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ 1679 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \ 1680 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ 1681 (C##n && memcmp(out,C##n,sizeof(out)))) \ 1682 ret++, printf ("encrypt test#%d failed.\n",n); \ 1683 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ 1684 memset(out,0,sizeof(out)); \ 1685 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ 1686 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \ 1687 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ 1688 (P##n && memcmp(out,P##n,sizeof(out)))) \ 1689 ret++, printf ("decrypt test#%d failed.\n",n); \ 1690 } while(0) 1691 1692 int main() 1693 { 1694 GCM128_CONTEXT ctx; 1695 AES_KEY key; 1696 int ret=0; 1697 1698 TEST_CASE(1); 1699 TEST_CASE(2); 1700 TEST_CASE(3); 1701 TEST_CASE(4); 1702 TEST_CASE(5); 1703 TEST_CASE(6); 1704 TEST_CASE(7); 1705 TEST_CASE(8); 1706 TEST_CASE(9); 1707 TEST_CASE(10); 1708 TEST_CASE(11); 1709 TEST_CASE(12); 1710 TEST_CASE(13); 1711 TEST_CASE(14); 1712 TEST_CASE(15); 1713 TEST_CASE(16); 1714 TEST_CASE(17); 1715 TEST_CASE(18); 1716 1717 #ifdef OPENSSL_CPUID_OBJ 1718 { 1719 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc(); 1720 union { u64 u; u8 c[1024]; } buf; 1721 int i; 1722 1723 AES_set_encrypt_key(K1,sizeof(K1)*8,&key); 1724 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); 1725 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1)); 1726 1727 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf)); 1728 start = OPENSSL_rdtsc(); 1729 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf)); 1730 gcm_t = OPENSSL_rdtsc() - start; 1731 1732 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf), 1733 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres, 1734 (block128_f)AES_encrypt); 1735 start = OPENSSL_rdtsc(); 1736 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf), 1737 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres, 1738 (block128_f)AES_encrypt); 1739 ctr_t = OPENSSL_rdtsc() - start; 1740 1741 printf("%.2f-%.2f=%.2f\n", 1742 gcm_t/(double)sizeof(buf), 1743 ctr_t/(double)sizeof(buf), 1744 (gcm_t-ctr_t)/(double)sizeof(buf)); 1745 #ifdef GHASH 1746 GHASH(&ctx,buf.c,sizeof(buf)); 1747 start = OPENSSL_rdtsc(); 1748 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf)); 1749 gcm_t = OPENSSL_rdtsc() - start; 1750 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i); 1751 #endif 1752 } 1753 #endif 1754 1755 return ret; 1756 } 1757 #endif 1758