1 /* ==================================================================== 2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * 3. All advertising materials mentioning features or use of this 17 * software must display the following acknowledgment: 18 * "This product includes software developed by the OpenSSL Project 19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 20 * 21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 22 * endorse or promote products derived from this software without 23 * prior written permission. For written permission, please contact 24 * openssl-core (at) openssl.org. 25 * 26 * 5. Products derived from this software may not be called "OpenSSL" 27 * nor may "OpenSSL" appear in their names without prior written 28 * permission of the OpenSSL Project. 29 * 30 * 6. Redistributions of any form whatsoever must retain the following 31 * acknowledgment: 32 * "This product includes software developed by the OpenSSL Project 33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 46 * OF THE POSSIBILITY OF SUCH DAMAGE. 47 * ==================================================================== */ 48 49 #include <openssl/base.h> 50 51 #include <assert.h> 52 #include <string.h> 53 54 #include <openssl/mem.h> 55 #include <openssl/cpu.h> 56 57 #include "internal.h" 58 #include "../../internal.h" 59 60 #if !defined(OPENSSL_NO_ASM) && \ 61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ 62 defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ 63 defined(OPENSSL_PPC64LE)) 64 #define GHASH_ASM 65 #endif 66 67 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16)) 68 #define REDUCE1BIT(V) \ 69 do { \ 70 if (sizeof(size_t) == 8) { \ 71 uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \ 72 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \ 73 (V).hi = ((V).hi >> 1) ^ T; \ 74 } else { \ 75 uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \ 76 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \ 77 (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \ 78 } \ 79 } while (0) 80 81 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four 82 // bits of a |size_t|. 83 static const size_t kSizeTWithoutLower4Bits = (size_t) -16; 84 85 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) { 86 u128 V; 87 88 Htable[0].hi = 0; 89 Htable[0].lo = 0; 90 V.hi = H[0]; 91 V.lo = H[1]; 92 93 Htable[8] = V; 94 REDUCE1BIT(V); 95 Htable[4] = V; 96 REDUCE1BIT(V); 97 Htable[2] = V; 98 REDUCE1BIT(V); 99 Htable[1] = V; 100 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 101 V = Htable[4]; 102 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 103 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 104 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 105 V = Htable[8]; 106 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 107 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 108 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 109 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 110 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 111 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 112 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 113 114 #if defined(GHASH_ASM) && defined(OPENSSL_ARM) 115 for (int j = 0; j < 16; ++j) { 116 V = Htable[j]; 117 Htable[j].hi = V.lo; 118 Htable[j].lo = V.hi; 119 } 120 #endif 121 } 122 123 #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE) 124 static const size_t rem_4bit[16] = { 125 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 126 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 127 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 128 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)}; 129 130 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { 131 u128 Z; 132 int cnt = 15; 133 size_t rem, nlo, nhi; 134 135 nlo = ((const uint8_t *)Xi)[15]; 136 nhi = nlo >> 4; 137 nlo &= 0xf; 138 139 Z.hi = Htable[nlo].hi; 140 Z.lo = Htable[nlo].lo; 141 142 while (1) { 143 rem = (size_t)Z.lo & 0xf; 144 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 145 Z.hi = (Z.hi >> 4); 146 if (sizeof(size_t) == 8) { 147 Z.hi ^= rem_4bit[rem]; 148 } else { 149 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 150 } 151 152 Z.hi ^= Htable[nhi].hi; 153 Z.lo ^= Htable[nhi].lo; 154 155 if (--cnt < 0) { 156 break; 157 } 158 159 nlo = ((const uint8_t *)Xi)[cnt]; 160 nhi = nlo >> 4; 161 nlo &= 0xf; 162 163 rem = (size_t)Z.lo & 0xf; 164 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 165 Z.hi = (Z.hi >> 4); 166 if (sizeof(size_t) == 8) { 167 Z.hi ^= rem_4bit[rem]; 168 } else { 169 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 170 } 171 172 Z.hi ^= Htable[nlo].hi; 173 Z.lo ^= Htable[nlo].lo; 174 } 175 176 Xi[0] = CRYPTO_bswap8(Z.hi); 177 Xi[1] = CRYPTO_bswap8(Z.lo); 178 } 179 180 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 181 * details... Compiler-generated code doesn't seem to give any 182 * performance improvement, at least not on x86[_64]. It's here 183 * mostly as reference and a placeholder for possible future 184 * non-trivial optimization[s]... */ 185 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], 186 const uint8_t *inp, size_t len) { 187 u128 Z; 188 int cnt; 189 size_t rem, nlo, nhi; 190 191 do { 192 cnt = 15; 193 nlo = ((const uint8_t *)Xi)[15]; 194 nlo ^= inp[15]; 195 nhi = nlo >> 4; 196 nlo &= 0xf; 197 198 Z.hi = Htable[nlo].hi; 199 Z.lo = Htable[nlo].lo; 200 201 while (1) { 202 rem = (size_t)Z.lo & 0xf; 203 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 204 Z.hi = (Z.hi >> 4); 205 if (sizeof(size_t) == 8) { 206 Z.hi ^= rem_4bit[rem]; 207 } else { 208 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 209 } 210 211 Z.hi ^= Htable[nhi].hi; 212 Z.lo ^= Htable[nhi].lo; 213 214 if (--cnt < 0) { 215 break; 216 } 217 218 nlo = ((const uint8_t *)Xi)[cnt]; 219 nlo ^= inp[cnt]; 220 nhi = nlo >> 4; 221 nlo &= 0xf; 222 223 rem = (size_t)Z.lo & 0xf; 224 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 225 Z.hi = (Z.hi >> 4); 226 if (sizeof(size_t) == 8) { 227 Z.hi ^= rem_4bit[rem]; 228 } else { 229 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 230 } 231 232 Z.hi ^= Htable[nlo].hi; 233 Z.lo ^= Htable[nlo].lo; 234 } 235 236 Xi[0] = CRYPTO_bswap8(Z.hi); 237 Xi[1] = CRYPTO_bswap8(Z.lo); 238 } while (inp += 16, len -= 16); 239 } 240 #else /* GHASH_ASM */ 241 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); 242 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 243 size_t len); 244 #endif 245 246 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable) 247 #if defined(GHASH_ASM) 248 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len) 249 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache 250 * trashing effect. In other words idea is to hash data while it's 251 * still in L1 cache after encryption pass... */ 252 #define GHASH_CHUNK (3 * 1024) 253 #endif 254 255 256 #if defined(GHASH_ASM) 257 258 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 259 #define GCM_FUNCREF_4BIT 260 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); 261 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); 262 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 263 size_t len); 264 265 #if defined(OPENSSL_X86_64) 266 #define GHASH_ASM_X86_64 267 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); 268 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); 269 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in, 270 size_t len); 271 #define AESNI_GCM 272 size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len, 273 const void *key, uint8_t ivec[16], uint64_t *Xi); 274 size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len, 275 const void *key, uint8_t ivec[16], uint64_t *Xi); 276 #endif 277 278 #if defined(OPENSSL_X86) 279 #define GHASH_ASM_X86 280 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); 281 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 282 size_t len); 283 #endif 284 285 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) 286 #include <openssl/arm_arch.h> 287 #if __ARM_ARCH__ >= 7 288 #define GHASH_ASM_ARM 289 #define GCM_FUNCREF_4BIT 290 291 static int pmull_capable(void) { 292 return CRYPTO_is_ARMv8_PMULL_capable(); 293 } 294 295 void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); 296 void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); 297 void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 298 size_t len); 299 300 #if defined(OPENSSL_ARM) 301 /* 32-bit ARM also has support for doing GCM with NEON instructions. */ 302 static int neon_capable(void) { 303 return CRYPTO_is_NEON_capable(); 304 } 305 306 void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); 307 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); 308 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 309 size_t len); 310 #else 311 /* AArch64 only has the ARMv8 versions of functions. */ 312 static int neon_capable(void) { 313 return 0; 314 } 315 static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { 316 abort(); 317 } 318 static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { 319 abort(); 320 } 321 static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], 322 const uint8_t *inp, size_t len) { 323 abort(); 324 } 325 #endif 326 327 #endif 328 #elif defined(OPENSSL_PPC64LE) 329 #define GHASH_ASM_PPC64LE 330 #define GCM_FUNCREF_4BIT 331 void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); 332 void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); 333 void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 334 size_t len); 335 #endif 336 #endif 337 338 #ifdef GCM_FUNCREF_4BIT 339 #undef GCM_MUL 340 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable) 341 #ifdef GHASH 342 #undef GHASH 343 #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len) 344 #endif 345 #endif 346 347 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, 348 u128 *out_key, u128 out_table[16], 349 int *out_is_avx, 350 const uint8_t *gcm_key) { 351 *out_is_avx = 0; 352 353 union { 354 uint64_t u[2]; 355 uint8_t c[16]; 356 } H; 357 358 OPENSSL_memcpy(H.c, gcm_key, 16); 359 360 /* H is stored in host byte order */ 361 H.u[0] = CRYPTO_bswap8(H.u[0]); 362 H.u[1] = CRYPTO_bswap8(H.u[1]); 363 364 OPENSSL_memcpy(out_key, H.c, 16); 365 366 #if defined(GHASH_ASM_X86_64) 367 if (crypto_gcm_clmul_enabled()) { 368 if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 369 gcm_init_avx(out_table, H.u); 370 *out_mult = gcm_gmult_avx; 371 *out_hash = gcm_ghash_avx; 372 *out_is_avx = 1; 373 return; 374 } 375 gcm_init_clmul(out_table, H.u); 376 *out_mult = gcm_gmult_clmul; 377 *out_hash = gcm_ghash_clmul; 378 return; 379 } 380 #elif defined(GHASH_ASM_X86) 381 if (crypto_gcm_clmul_enabled()) { 382 gcm_init_clmul(out_table, H.u); 383 *out_mult = gcm_gmult_clmul; 384 *out_hash = gcm_ghash_clmul; 385 return; 386 } 387 #elif defined(GHASH_ASM_ARM) 388 if (pmull_capable()) { 389 gcm_init_v8(out_table, H.u); 390 *out_mult = gcm_gmult_v8; 391 *out_hash = gcm_ghash_v8; 392 return; 393 } 394 395 if (neon_capable()) { 396 gcm_init_neon(out_table, H.u); 397 *out_mult = gcm_gmult_neon; 398 *out_hash = gcm_ghash_neon; 399 return; 400 } 401 #elif defined(GHASH_ASM_PPC64LE) 402 if (CRYPTO_is_PPC64LE_vcrypto_capable()) { 403 gcm_init_p8(out_table, H.u); 404 *out_mult = gcm_gmult_p8; 405 *out_hash = gcm_ghash_p8; 406 return; 407 } 408 #endif 409 410 gcm_init_4bit(out_table, H.u); 411 #if defined(GHASH_ASM_X86) 412 *out_mult = gcm_gmult_4bit_mmx; 413 *out_hash = gcm_ghash_4bit_mmx; 414 #else 415 *out_mult = gcm_gmult_4bit; 416 *out_hash = gcm_ghash_4bit; 417 #endif 418 } 419 420 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key, 421 block128_f block, int is_aesni_encrypt) { 422 OPENSSL_memset(ctx, 0, sizeof(*ctx)); 423 ctx->block = block; 424 425 uint8_t gcm_key[16]; 426 OPENSSL_memset(gcm_key, 0, sizeof(gcm_key)); 427 (*block)(gcm_key, gcm_key, aes_key); 428 429 int is_avx; 430 CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx, 431 gcm_key); 432 433 ctx->use_aesni_gcm_crypt = (is_avx && is_aesni_encrypt) ? 1 : 0; 434 } 435 436 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key, 437 const uint8_t *iv, size_t len) { 438 unsigned int ctr; 439 #ifdef GCM_FUNCREF_4BIT 440 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 441 #endif 442 443 ctx->Yi.u[0] = 0; 444 ctx->Yi.u[1] = 0; 445 ctx->Xi.u[0] = 0; 446 ctx->Xi.u[1] = 0; 447 ctx->len.u[0] = 0; /* AAD length */ 448 ctx->len.u[1] = 0; /* message length */ 449 ctx->ares = 0; 450 ctx->mres = 0; 451 452 if (len == 12) { 453 OPENSSL_memcpy(ctx->Yi.c, iv, 12); 454 ctx->Yi.c[15] = 1; 455 ctr = 1; 456 } else { 457 uint64_t len0 = len; 458 459 while (len >= 16) { 460 for (size_t i = 0; i < 16; ++i) { 461 ctx->Yi.c[i] ^= iv[i]; 462 } 463 GCM_MUL(ctx, Yi); 464 iv += 16; 465 len -= 16; 466 } 467 if (len) { 468 for (size_t i = 0; i < len; ++i) { 469 ctx->Yi.c[i] ^= iv[i]; 470 } 471 GCM_MUL(ctx, Yi); 472 } 473 len0 <<= 3; 474 ctx->Yi.u[1] ^= CRYPTO_bswap8(len0); 475 476 GCM_MUL(ctx, Yi); 477 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 478 } 479 480 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key); 481 ++ctr; 482 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 483 } 484 485 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) { 486 unsigned int n; 487 uint64_t alen = ctx->len.u[0]; 488 #ifdef GCM_FUNCREF_4BIT 489 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 490 #ifdef GHASH 491 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 492 size_t len) = ctx->ghash; 493 #endif 494 #endif 495 496 if (ctx->len.u[1]) { 497 return 0; 498 } 499 500 alen += len; 501 if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) { 502 return 0; 503 } 504 ctx->len.u[0] = alen; 505 506 n = ctx->ares; 507 if (n) { 508 while (n && len) { 509 ctx->Xi.c[n] ^= *(aad++); 510 --len; 511 n = (n + 1) % 16; 512 } 513 if (n == 0) { 514 GCM_MUL(ctx, Xi); 515 } else { 516 ctx->ares = n; 517 return 1; 518 } 519 } 520 521 /* Process a whole number of blocks. */ 522 #ifdef GHASH 523 size_t len_blocks = len & kSizeTWithoutLower4Bits; 524 if (len_blocks != 0) { 525 GHASH(ctx, aad, len_blocks); 526 aad += len_blocks; 527 len -= len_blocks; 528 } 529 #else 530 while (len >= 16) { 531 for (size_t i = 0; i < 16; ++i) { 532 ctx->Xi.c[i] ^= aad[i]; 533 } 534 GCM_MUL(ctx, Xi); 535 aad += 16; 536 len -= 16; 537 } 538 #endif 539 540 /* Process the remainder. */ 541 if (len != 0) { 542 n = (unsigned int)len; 543 for (size_t i = 0; i < len; ++i) { 544 ctx->Xi.c[i] ^= aad[i]; 545 } 546 } 547 548 ctx->ares = n; 549 return 1; 550 } 551 552 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key, 553 const unsigned char *in, unsigned char *out, 554 size_t len) { 555 unsigned int n, ctr; 556 uint64_t mlen = ctx->len.u[1]; 557 block128_f block = ctx->block; 558 #ifdef GCM_FUNCREF_4BIT 559 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 560 #ifdef GHASH 561 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 562 size_t len) = ctx->ghash; 563 #endif 564 #endif 565 566 mlen += len; 567 if (mlen > ((UINT64_C(1) << 36) - 32) || 568 (sizeof(len) == 8 && mlen < len)) { 569 return 0; 570 } 571 ctx->len.u[1] = mlen; 572 573 if (ctx->ares) { 574 /* First call to encrypt finalizes GHASH(AAD) */ 575 GCM_MUL(ctx, Xi); 576 ctx->ares = 0; 577 } 578 579 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 580 581 n = ctx->mres; 582 if (n) { 583 while (n && len) { 584 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 585 --len; 586 n = (n + 1) % 16; 587 } 588 if (n == 0) { 589 GCM_MUL(ctx, Xi); 590 } else { 591 ctx->mres = n; 592 return 1; 593 } 594 } 595 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) { 596 for (size_t i = 0; i < len; ++i) { 597 if (n == 0) { 598 (*block)(ctx->Yi.c, ctx->EKi.c, key); 599 ++ctr; 600 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 601 } 602 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 603 n = (n + 1) % 16; 604 if (n == 0) { 605 GCM_MUL(ctx, Xi); 606 } 607 } 608 609 ctx->mres = n; 610 return 1; 611 } 612 #if defined(GHASH) && defined(GHASH_CHUNK) 613 while (len >= GHASH_CHUNK) { 614 size_t j = GHASH_CHUNK; 615 616 while (j) { 617 size_t *out_t = (size_t *)out; 618 const size_t *in_t = (const size_t *)in; 619 620 (*block)(ctx->Yi.c, ctx->EKi.c, key); 621 ++ctr; 622 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 623 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) { 624 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 625 } 626 out += 16; 627 in += 16; 628 j -= 16; 629 } 630 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 631 len -= GHASH_CHUNK; 632 } 633 size_t len_blocks = len & kSizeTWithoutLower4Bits; 634 if (len_blocks != 0) { 635 while (len >= 16) { 636 size_t *out_t = (size_t *)out; 637 const size_t *in_t = (const size_t *)in; 638 639 (*block)(ctx->Yi.c, ctx->EKi.c, key); 640 ++ctr; 641 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 642 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) { 643 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 644 } 645 out += 16; 646 in += 16; 647 len -= 16; 648 } 649 GHASH(ctx, out - len_blocks, len_blocks); 650 } 651 #else 652 while (len >= 16) { 653 size_t *out_t = (size_t *)out; 654 const size_t *in_t = (const size_t *)in; 655 656 (*block)(ctx->Yi.c, ctx->EKi.c, key); 657 ++ctr; 658 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 659 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) { 660 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 661 } 662 GCM_MUL(ctx, Xi); 663 out += 16; 664 in += 16; 665 len -= 16; 666 } 667 #endif 668 if (len) { 669 (*block)(ctx->Yi.c, ctx->EKi.c, key); 670 ++ctr; 671 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 672 while (len--) { 673 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 674 ++n; 675 } 676 } 677 678 ctx->mres = n; 679 return 1; 680 } 681 682 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key, 683 const unsigned char *in, unsigned char *out, 684 size_t len) { 685 unsigned int n, ctr; 686 uint64_t mlen = ctx->len.u[1]; 687 block128_f block = ctx->block; 688 #ifdef GCM_FUNCREF_4BIT 689 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 690 #ifdef GHASH 691 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 692 size_t len) = ctx->ghash; 693 #endif 694 #endif 695 696 mlen += len; 697 if (mlen > ((UINT64_C(1) << 36) - 32) || 698 (sizeof(len) == 8 && mlen < len)) { 699 return 0; 700 } 701 ctx->len.u[1] = mlen; 702 703 if (ctx->ares) { 704 /* First call to decrypt finalizes GHASH(AAD) */ 705 GCM_MUL(ctx, Xi); 706 ctx->ares = 0; 707 } 708 709 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 710 711 n = ctx->mres; 712 if (n) { 713 while (n && len) { 714 uint8_t c = *(in++); 715 *(out++) = c ^ ctx->EKi.c[n]; 716 ctx->Xi.c[n] ^= c; 717 --len; 718 n = (n + 1) % 16; 719 } 720 if (n == 0) { 721 GCM_MUL(ctx, Xi); 722 } else { 723 ctx->mres = n; 724 return 1; 725 } 726 } 727 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) { 728 for (size_t i = 0; i < len; ++i) { 729 uint8_t c; 730 if (n == 0) { 731 (*block)(ctx->Yi.c, ctx->EKi.c, key); 732 ++ctr; 733 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 734 } 735 c = in[i]; 736 out[i] = c ^ ctx->EKi.c[n]; 737 ctx->Xi.c[n] ^= c; 738 n = (n + 1) % 16; 739 if (n == 0) { 740 GCM_MUL(ctx, Xi); 741 } 742 } 743 744 ctx->mres = n; 745 return 1; 746 } 747 #if defined(GHASH) && defined(GHASH_CHUNK) 748 while (len >= GHASH_CHUNK) { 749 size_t j = GHASH_CHUNK; 750 751 GHASH(ctx, in, GHASH_CHUNK); 752 while (j) { 753 size_t *out_t = (size_t *)out; 754 const size_t *in_t = (const size_t *)in; 755 756 (*block)(ctx->Yi.c, ctx->EKi.c, key); 757 ++ctr; 758 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 759 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) { 760 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 761 } 762 out += 16; 763 in += 16; 764 j -= 16; 765 } 766 len -= GHASH_CHUNK; 767 } 768 size_t len_blocks = len & kSizeTWithoutLower4Bits; 769 if (len_blocks != 0) { 770 GHASH(ctx, in, len_blocks); 771 while (len >= 16) { 772 size_t *out_t = (size_t *)out; 773 const size_t *in_t = (const size_t *)in; 774 775 (*block)(ctx->Yi.c, ctx->EKi.c, key); 776 ++ctr; 777 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 778 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) { 779 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 780 } 781 out += 16; 782 in += 16; 783 len -= 16; 784 } 785 } 786 #else 787 while (len >= 16) { 788 size_t *out_t = (size_t *)out; 789 const size_t *in_t = (const size_t *)in; 790 791 (*block)(ctx->Yi.c, ctx->EKi.c, key); 792 ++ctr; 793 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 794 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) { 795 size_t c = in_t[i]; 796 out_t[i] = c ^ ctx->EKi.t[i]; 797 ctx->Xi.t[i] ^= c; 798 } 799 GCM_MUL(ctx, Xi); 800 out += 16; 801 in += 16; 802 len -= 16; 803 } 804 #endif 805 if (len) { 806 (*block)(ctx->Yi.c, ctx->EKi.c, key); 807 ++ctr; 808 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 809 while (len--) { 810 uint8_t c = in[n]; 811 ctx->Xi.c[n] ^= c; 812 out[n] = c ^ ctx->EKi.c[n]; 813 ++n; 814 } 815 } 816 817 ctx->mres = n; 818 return 1; 819 } 820 821 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key, 822 const uint8_t *in, uint8_t *out, size_t len, 823 ctr128_f stream) { 824 unsigned int n, ctr; 825 uint64_t mlen = ctx->len.u[1]; 826 #ifdef GCM_FUNCREF_4BIT 827 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 828 #ifdef GHASH 829 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 830 size_t len) = ctx->ghash; 831 #endif 832 #endif 833 834 mlen += len; 835 if (mlen > ((UINT64_C(1) << 36) - 32) || 836 (sizeof(len) == 8 && mlen < len)) { 837 return 0; 838 } 839 ctx->len.u[1] = mlen; 840 841 if (ctx->ares) { 842 /* First call to encrypt finalizes GHASH(AAD) */ 843 GCM_MUL(ctx, Xi); 844 ctx->ares = 0; 845 } 846 847 n = ctx->mres; 848 if (n) { 849 while (n && len) { 850 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 851 --len; 852 n = (n + 1) % 16; 853 } 854 if (n == 0) { 855 GCM_MUL(ctx, Xi); 856 } else { 857 ctx->mres = n; 858 return 1; 859 } 860 } 861 862 #if defined(AESNI_GCM) 863 if (ctx->use_aesni_gcm_crypt) { 864 /* |aesni_gcm_encrypt| may not process all the input given to it. It may 865 * not process *any* of its input if it is deemed too small. */ 866 size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u); 867 in += bulk; 868 out += bulk; 869 len -= bulk; 870 } 871 #endif 872 873 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 874 875 #if defined(GHASH) 876 while (len >= GHASH_CHUNK) { 877 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 878 ctr += GHASH_CHUNK / 16; 879 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 880 GHASH(ctx, out, GHASH_CHUNK); 881 out += GHASH_CHUNK; 882 in += GHASH_CHUNK; 883 len -= GHASH_CHUNK; 884 } 885 #endif 886 size_t i = len & kSizeTWithoutLower4Bits; 887 if (i != 0) { 888 size_t j = i / 16; 889 890 (*stream)(in, out, j, key, ctx->Yi.c); 891 ctr += (unsigned int)j; 892 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 893 in += i; 894 len -= i; 895 #if defined(GHASH) 896 GHASH(ctx, out, i); 897 out += i; 898 #else 899 while (j--) { 900 for (i = 0; i < 16; ++i) { 901 ctx->Xi.c[i] ^= out[i]; 902 } 903 GCM_MUL(ctx, Xi); 904 out += 16; 905 } 906 #endif 907 } 908 if (len) { 909 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 910 ++ctr; 911 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 912 while (len--) { 913 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 914 ++n; 915 } 916 } 917 918 ctx->mres = n; 919 return 1; 920 } 921 922 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key, 923 const uint8_t *in, uint8_t *out, size_t len, 924 ctr128_f stream) { 925 unsigned int n, ctr; 926 uint64_t mlen = ctx->len.u[1]; 927 #ifdef GCM_FUNCREF_4BIT 928 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 929 #ifdef GHASH 930 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 931 size_t len) = ctx->ghash; 932 #endif 933 #endif 934 935 mlen += len; 936 if (mlen > ((UINT64_C(1) << 36) - 32) || 937 (sizeof(len) == 8 && mlen < len)) { 938 return 0; 939 } 940 ctx->len.u[1] = mlen; 941 942 if (ctx->ares) { 943 /* First call to decrypt finalizes GHASH(AAD) */ 944 GCM_MUL(ctx, Xi); 945 ctx->ares = 0; 946 } 947 948 n = ctx->mres; 949 if (n) { 950 while (n && len) { 951 uint8_t c = *(in++); 952 *(out++) = c ^ ctx->EKi.c[n]; 953 ctx->Xi.c[n] ^= c; 954 --len; 955 n = (n + 1) % 16; 956 } 957 if (n == 0) { 958 GCM_MUL(ctx, Xi); 959 } else { 960 ctx->mres = n; 961 return 1; 962 } 963 } 964 965 #if defined(AESNI_GCM) 966 if (ctx->use_aesni_gcm_crypt) { 967 /* |aesni_gcm_decrypt| may not process all the input given to it. It may 968 * not process *any* of its input if it is deemed too small. */ 969 size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u); 970 in += bulk; 971 out += bulk; 972 len -= bulk; 973 } 974 #endif 975 976 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 977 978 #if defined(GHASH) 979 while (len >= GHASH_CHUNK) { 980 GHASH(ctx, in, GHASH_CHUNK); 981 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 982 ctr += GHASH_CHUNK / 16; 983 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 984 out += GHASH_CHUNK; 985 in += GHASH_CHUNK; 986 len -= GHASH_CHUNK; 987 } 988 #endif 989 size_t i = len & kSizeTWithoutLower4Bits; 990 if (i != 0) { 991 size_t j = i / 16; 992 993 #if defined(GHASH) 994 GHASH(ctx, in, i); 995 #else 996 while (j--) { 997 size_t k; 998 for (k = 0; k < 16; ++k) { 999 ctx->Xi.c[k] ^= in[k]; 1000 } 1001 GCM_MUL(ctx, Xi); 1002 in += 16; 1003 } 1004 j = i / 16; 1005 in -= i; 1006 #endif 1007 (*stream)(in, out, j, key, ctx->Yi.c); 1008 ctr += (unsigned int)j; 1009 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 1010 out += i; 1011 in += i; 1012 len -= i; 1013 } 1014 if (len) { 1015 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 1016 ++ctr; 1017 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 1018 while (len--) { 1019 uint8_t c = in[n]; 1020 ctx->Xi.c[n] ^= c; 1021 out[n] = c ^ ctx->EKi.c[n]; 1022 ++n; 1023 } 1024 } 1025 1026 ctx->mres = n; 1027 return 1; 1028 } 1029 1030 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) { 1031 uint64_t alen = ctx->len.u[0] << 3; 1032 uint64_t clen = ctx->len.u[1] << 3; 1033 #ifdef GCM_FUNCREF_4BIT 1034 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 1035 #endif 1036 1037 if (ctx->mres || ctx->ares) { 1038 GCM_MUL(ctx, Xi); 1039 } 1040 1041 alen = CRYPTO_bswap8(alen); 1042 clen = CRYPTO_bswap8(clen); 1043 1044 ctx->Xi.u[0] ^= alen; 1045 ctx->Xi.u[1] ^= clen; 1046 GCM_MUL(ctx, Xi); 1047 1048 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1049 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1050 1051 if (tag && len <= sizeof(ctx->Xi)) { 1052 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0; 1053 } else { 1054 return 0; 1055 } 1056 } 1057 1058 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) { 1059 CRYPTO_gcm128_finish(ctx, NULL, 0); 1060 OPENSSL_memcpy(tag, ctx->Xi.c, 1061 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1062 } 1063 1064 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 1065 int crypto_gcm_clmul_enabled(void) { 1066 #ifdef GHASH_ASM 1067 const uint32_t *ia32cap = OPENSSL_ia32cap_get(); 1068 return (ia32cap[0] & (1 << 24)) && /* check FXSR bit */ 1069 (ia32cap[1] & (1 << 1)); /* check PCLMULQDQ bit */ 1070 #else 1071 return 0; 1072 #endif 1073 } 1074 #endif 1075