1 /* ==================================================================== 2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * 3. All advertising materials mentioning features or use of this 17 * software must display the following acknowledgment: 18 * "This product includes software developed by the OpenSSL Project 19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 20 * 21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 22 * endorse or promote products derived from this software without 23 * prior written permission. For written permission, please contact 24 * openssl-core (at) openssl.org. 25 * 26 * 5. Products derived from this software may not be called "OpenSSL" 27 * nor may "OpenSSL" appear in their names without prior written 28 * permission of the OpenSSL Project. 29 * 30 * 6. Redistributions of any form whatsoever must retain the following 31 * acknowledgment: 32 * "This product includes software developed by the OpenSSL Project 33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 46 * OF THE POSSIBILITY OF SUCH DAMAGE. 47 * ==================================================================== */ 48 49 #include <openssl/base.h> 50 51 #include <assert.h> 52 #include <string.h> 53 54 #include <openssl/mem.h> 55 #include <openssl/cpu.h> 56 57 #include "internal.h" 58 59 60 #if !defined(OPENSSL_NO_ASM) && \ 61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ 62 defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) 63 #define GHASH_ASM 64 #endif 65 66 #if defined(BSWAP4) && STRICT_ALIGNMENT == 1 67 /* redefine, because alignment is ensured */ 68 #undef GETU32 69 #define GETU32(p) BSWAP4(*(const uint32_t *)(p)) 70 #undef PUTU32 71 #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v) 72 #endif 73 74 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16)) 75 #define REDUCE1BIT(V) \ 76 do { \ 77 if (sizeof(size_t) == 8) { \ 78 uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V.lo & 1)); \ 79 V.lo = (V.hi << 63) | (V.lo >> 1); \ 80 V.hi = (V.hi >> 1) ^ T; \ 81 } else { \ 82 uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \ 83 V.lo = (V.hi << 63) | (V.lo >> 1); \ 84 V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \ 85 } \ 86 } while (0) 87 88 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four 89 // bits of a |size_t|. 90 static const size_t kSizeTWithoutLower4Bits = (size_t) -16; 91 92 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) { 93 u128 V; 94 95 Htable[0].hi = 0; 96 Htable[0].lo = 0; 97 V.hi = H[0]; 98 V.lo = H[1]; 99 100 Htable[8] = V; 101 REDUCE1BIT(V); 102 Htable[4] = V; 103 REDUCE1BIT(V); 104 Htable[2] = V; 105 REDUCE1BIT(V); 106 Htable[1] = V; 107 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 108 V = Htable[4]; 109 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 110 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 111 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 112 V = Htable[8]; 113 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 114 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 115 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 116 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 117 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 118 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 119 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 120 121 #if defined(GHASH_ASM) && defined(OPENSSL_ARM) 122 /* ARM assembler expects specific dword order in Htable. */ 123 { 124 int j; 125 const union { 126 long one; 127 char little; 128 } is_endian = {1}; 129 130 if (is_endian.little) { 131 for (j = 0; j < 16; ++j) { 132 V = Htable[j]; 133 Htable[j].hi = V.lo; 134 Htable[j].lo = V.hi; 135 } 136 } else { 137 for (j = 0; j < 16; ++j) { 138 V = Htable[j]; 139 Htable[j].hi = V.lo << 32 | V.lo >> 32; 140 Htable[j].lo = V.hi << 32 | V.hi >> 32; 141 } 142 } 143 } 144 #endif 145 } 146 147 #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) 148 static const size_t rem_4bit[16] = { 149 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 150 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 151 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 152 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)}; 153 154 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { 155 u128 Z; 156 int cnt = 15; 157 size_t rem, nlo, nhi; 158 const union { 159 long one; 160 char little; 161 } is_endian = {1}; 162 163 nlo = ((const uint8_t *)Xi)[15]; 164 nhi = nlo >> 4; 165 nlo &= 0xf; 166 167 Z.hi = Htable[nlo].hi; 168 Z.lo = Htable[nlo].lo; 169 170 while (1) { 171 rem = (size_t)Z.lo & 0xf; 172 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 173 Z.hi = (Z.hi >> 4); 174 if (sizeof(size_t) == 8) { 175 Z.hi ^= rem_4bit[rem]; 176 } else { 177 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 178 } 179 180 Z.hi ^= Htable[nhi].hi; 181 Z.lo ^= Htable[nhi].lo; 182 183 if (--cnt < 0) { 184 break; 185 } 186 187 nlo = ((const uint8_t *)Xi)[cnt]; 188 nhi = nlo >> 4; 189 nlo &= 0xf; 190 191 rem = (size_t)Z.lo & 0xf; 192 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 193 Z.hi = (Z.hi >> 4); 194 if (sizeof(size_t) == 8) { 195 Z.hi ^= rem_4bit[rem]; 196 } else { 197 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 198 } 199 200 Z.hi ^= Htable[nlo].hi; 201 Z.lo ^= Htable[nlo].lo; 202 } 203 204 if (is_endian.little) { 205 #ifdef BSWAP8 206 Xi[0] = BSWAP8(Z.hi); 207 Xi[1] = BSWAP8(Z.lo); 208 #else 209 uint8_t *p = (uint8_t *)Xi; 210 uint32_t v; 211 v = (uint32_t)(Z.hi >> 32); 212 PUTU32(p, v); 213 v = (uint32_t)(Z.hi); 214 PUTU32(p + 4, v); 215 v = (uint32_t)(Z.lo >> 32); 216 PUTU32(p + 8, v); 217 v = (uint32_t)(Z.lo); 218 PUTU32(p + 12, v); 219 #endif 220 } else { 221 Xi[0] = Z.hi; 222 Xi[1] = Z.lo; 223 } 224 } 225 226 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 227 * details... Compiler-generated code doesn't seem to give any 228 * performance improvement, at least not on x86[_64]. It's here 229 * mostly as reference and a placeholder for possible future 230 * non-trivial optimization[s]... */ 231 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 232 size_t len) { 233 u128 Z; 234 int cnt; 235 size_t rem, nlo, nhi; 236 const union { 237 long one; 238 char little; 239 } is_endian = {1}; 240 241 do { 242 cnt = 15; 243 nlo = ((const uint8_t *)Xi)[15]; 244 nlo ^= inp[15]; 245 nhi = nlo >> 4; 246 nlo &= 0xf; 247 248 Z.hi = Htable[nlo].hi; 249 Z.lo = Htable[nlo].lo; 250 251 while (1) { 252 rem = (size_t)Z.lo & 0xf; 253 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 254 Z.hi = (Z.hi >> 4); 255 if (sizeof(size_t) == 8) { 256 Z.hi ^= rem_4bit[rem]; 257 } else { 258 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 259 } 260 261 Z.hi ^= Htable[nhi].hi; 262 Z.lo ^= Htable[nhi].lo; 263 264 if (--cnt < 0) { 265 break; 266 } 267 268 nlo = ((const uint8_t *)Xi)[cnt]; 269 nlo ^= inp[cnt]; 270 nhi = nlo >> 4; 271 nlo &= 0xf; 272 273 rem = (size_t)Z.lo & 0xf; 274 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 275 Z.hi = (Z.hi >> 4); 276 if (sizeof(size_t) == 8) { 277 Z.hi ^= rem_4bit[rem]; 278 } else { 279 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 280 } 281 282 Z.hi ^= Htable[nlo].hi; 283 Z.lo ^= Htable[nlo].lo; 284 } 285 286 if (is_endian.little) { 287 #ifdef BSWAP8 288 Xi[0] = BSWAP8(Z.hi); 289 Xi[1] = BSWAP8(Z.lo); 290 #else 291 uint8_t *p = (uint8_t *)Xi; 292 uint32_t v; 293 v = (uint32_t)(Z.hi >> 32); 294 PUTU32(p, v); 295 v = (uint32_t)(Z.hi); 296 PUTU32(p + 4, v); 297 v = (uint32_t)(Z.lo >> 32); 298 PUTU32(p + 8, v); 299 v = (uint32_t)(Z.lo); 300 PUTU32(p + 12, v); 301 #endif 302 } else { 303 Xi[0] = Z.hi; 304 Xi[1] = Z.lo; 305 } 306 } while (inp += 16, len -= 16); 307 } 308 #else /* GHASH_ASM */ 309 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); 310 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 311 size_t len); 312 #endif 313 314 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable) 315 #if defined(GHASH_ASM) 316 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len) 317 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache 318 * trashing effect. In other words idea is to hash data while it's 319 * still in L1 cache after encryption pass... */ 320 #define GHASH_CHUNK (3 * 1024) 321 #endif 322 323 324 #if defined(GHASH_ASM) 325 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 326 #define GHASH_ASM_X86_OR_64 327 #define GCM_FUNCREF_4BIT 328 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); 329 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); 330 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 331 size_t len); 332 333 #if defined(OPENSSL_X86) 334 #define gcm_init_avx gcm_init_clmul 335 #define gcm_gmult_avx gcm_gmult_clmul 336 #define gcm_ghash_avx gcm_ghash_clmul 337 #else 338 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); 339 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); 340 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len); 341 #endif 342 343 #if defined(OPENSSL_X86) 344 #define GHASH_ASM_X86 345 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); 346 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 347 size_t len); 348 349 void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]); 350 void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 351 size_t len); 352 #endif 353 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) 354 #include <openssl/arm_arch.h> 355 #if __ARM_ARCH__ >= 7 356 #define GHASH_ASM_ARM 357 #define GCM_FUNCREF_4BIT 358 359 static int pmull_capable(void) { 360 return CRYPTO_is_ARMv8_PMULL_capable(); 361 } 362 363 void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); 364 void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); 365 void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 366 size_t len); 367 368 #if defined(OPENSSL_ARM) 369 /* 32-bit ARM also has support for doing GCM with NEON instructions. */ 370 static int neon_capable(void) { 371 return CRYPTO_is_NEON_capable(); 372 } 373 374 void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); 375 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); 376 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 377 size_t len); 378 #else 379 /* AArch64 only has the ARMv8 versions of functions. */ 380 static int neon_capable(void) { 381 return 0; 382 } 383 void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { 384 abort(); 385 } 386 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { 387 abort(); 388 } 389 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 390 size_t len) { 391 abort(); 392 } 393 #endif 394 395 #endif 396 #endif 397 #endif 398 399 #ifdef GCM_FUNCREF_4BIT 400 #undef GCM_MUL 401 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable) 402 #ifdef GHASH 403 #undef GHASH 404 #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len) 405 #endif 406 #endif 407 408 GCM128_CONTEXT *CRYPTO_gcm128_new(const void *key, block128_f block) { 409 GCM128_CONTEXT *ret; 410 411 ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT)); 412 if (ret != NULL) { 413 CRYPTO_gcm128_init(ret, key, block); 414 } 415 416 return ret; 417 } 418 419 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key, 420 block128_f block) { 421 const union { 422 long one; 423 char little; 424 } is_endian = {1}; 425 426 memset(ctx, 0, sizeof(*ctx)); 427 ctx->block = block; 428 429 (*block)(ctx->H.c, ctx->H.c, key); 430 431 if (is_endian.little) { 432 /* H is stored in host byte order */ 433 #ifdef BSWAP8 434 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 435 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 436 #else 437 uint8_t *p = ctx->H.c; 438 uint64_t hi, lo; 439 hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4); 440 lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12); 441 ctx->H.u[0] = hi; 442 ctx->H.u[1] = lo; 443 #endif 444 } 445 446 #if defined(GHASH_ASM_X86_OR_64) 447 if (crypto_gcm_clmul_enabled()) { 448 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 449 gcm_init_avx(ctx->Htable, ctx->H.u); 450 ctx->gmult = gcm_gmult_avx; 451 ctx->ghash = gcm_ghash_avx; 452 } else { 453 gcm_init_clmul(ctx->Htable, ctx->H.u); 454 ctx->gmult = gcm_gmult_clmul; 455 ctx->ghash = gcm_ghash_clmul; 456 } 457 return; 458 } 459 gcm_init_4bit(ctx->Htable, ctx->H.u); 460 #if defined(GHASH_ASM_X86) /* x86 only */ 461 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 462 ctx->gmult = gcm_gmult_4bit_mmx; 463 ctx->ghash = gcm_ghash_4bit_mmx; 464 } else { 465 ctx->gmult = gcm_gmult_4bit_x86; 466 ctx->ghash = gcm_ghash_4bit_x86; 467 } 468 #else 469 ctx->gmult = gcm_gmult_4bit; 470 ctx->ghash = gcm_ghash_4bit; 471 #endif 472 #elif defined(GHASH_ASM_ARM) 473 if (pmull_capable()) { 474 gcm_init_v8(ctx->Htable, ctx->H.u); 475 ctx->gmult = gcm_gmult_v8; 476 ctx->ghash = gcm_ghash_v8; 477 } else if (neon_capable()) { 478 gcm_init_neon(ctx->Htable,ctx->H.u); 479 ctx->gmult = gcm_gmult_neon; 480 ctx->ghash = gcm_ghash_neon; 481 } else { 482 gcm_init_4bit(ctx->Htable, ctx->H.u); 483 ctx->gmult = gcm_gmult_4bit; 484 ctx->ghash = gcm_ghash_4bit; 485 } 486 #else 487 gcm_init_4bit(ctx->Htable, ctx->H.u); 488 ctx->gmult = gcm_gmult_4bit; 489 ctx->ghash = gcm_ghash_4bit; 490 #endif 491 } 492 493 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key, 494 const uint8_t *iv, size_t len) { 495 const union { 496 long one; 497 char little; 498 } is_endian = {1}; 499 unsigned int ctr; 500 #ifdef GCM_FUNCREF_4BIT 501 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 502 #endif 503 504 ctx->Yi.u[0] = 0; 505 ctx->Yi.u[1] = 0; 506 ctx->Xi.u[0] = 0; 507 ctx->Xi.u[1] = 0; 508 ctx->len.u[0] = 0; /* AAD length */ 509 ctx->len.u[1] = 0; /* message length */ 510 ctx->ares = 0; 511 ctx->mres = 0; 512 513 if (len == 12) { 514 memcpy(ctx->Yi.c, iv, 12); 515 ctx->Yi.c[15] = 1; 516 ctr = 1; 517 } else { 518 size_t i; 519 uint64_t len0 = len; 520 521 while (len >= 16) { 522 for (i = 0; i < 16; ++i) { 523 ctx->Yi.c[i] ^= iv[i]; 524 } 525 GCM_MUL(ctx, Yi); 526 iv += 16; 527 len -= 16; 528 } 529 if (len) { 530 for (i = 0; i < len; ++i) { 531 ctx->Yi.c[i] ^= iv[i]; 532 } 533 GCM_MUL(ctx, Yi); 534 } 535 len0 <<= 3; 536 if (is_endian.little) { 537 #ifdef BSWAP8 538 ctx->Yi.u[1] ^= BSWAP8(len0); 539 #else 540 ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56); 541 ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48); 542 ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40); 543 ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32); 544 ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24); 545 ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16); 546 ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8); 547 ctx->Yi.c[15] ^= (uint8_t)(len0); 548 #endif 549 } else { 550 ctx->Yi.u[1] ^= len0; 551 } 552 553 GCM_MUL(ctx, Yi); 554 555 if (is_endian.little) { 556 ctr = GETU32(ctx->Yi.c + 12); 557 } else { 558 ctr = ctx->Yi.d[3]; 559 } 560 } 561 562 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key); 563 ++ctr; 564 if (is_endian.little) { 565 PUTU32(ctx->Yi.c + 12, ctr); 566 } else { 567 ctx->Yi.d[3] = ctr; 568 } 569 } 570 571 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) { 572 size_t i; 573 unsigned int n; 574 uint64_t alen = ctx->len.u[0]; 575 #ifdef GCM_FUNCREF_4BIT 576 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 577 #ifdef GHASH 578 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 579 size_t len) = ctx->ghash; 580 #endif 581 #endif 582 583 if (ctx->len.u[1]) { 584 return 0; 585 } 586 587 alen += len; 588 if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) { 589 return 0; 590 } 591 ctx->len.u[0] = alen; 592 593 n = ctx->ares; 594 if (n) { 595 while (n && len) { 596 ctx->Xi.c[n] ^= *(aad++); 597 --len; 598 n = (n + 1) % 16; 599 } 600 if (n == 0) { 601 GCM_MUL(ctx, Xi); 602 } else { 603 ctx->ares = n; 604 return 1; 605 } 606 } 607 608 #ifdef GHASH 609 if ((i = (len & (size_t) - 16))) { 610 GHASH(ctx, aad, i); 611 aad += i; 612 len -= i; 613 } 614 #else 615 while (len >= 16) { 616 for (i = 0; i < 16; ++i) { 617 ctx->Xi.c[i] ^= aad[i]; 618 } 619 GCM_MUL(ctx, Xi); 620 aad += 16; 621 len -= 16; 622 } 623 #endif 624 if (len) { 625 n = (unsigned int)len; 626 for (i = 0; i < len; ++i) { 627 ctx->Xi.c[i] ^= aad[i]; 628 } 629 } 630 631 ctx->ares = n; 632 return 1; 633 } 634 635 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key, 636 const unsigned char *in, unsigned char *out, 637 size_t len) { 638 const union { 639 long one; 640 char little; 641 } is_endian = {1}; 642 unsigned int n, ctr; 643 size_t i; 644 uint64_t mlen = ctx->len.u[1]; 645 block128_f block = ctx->block; 646 #ifdef GCM_FUNCREF_4BIT 647 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 648 #ifdef GHASH 649 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 650 size_t len) = ctx->ghash; 651 #endif 652 #endif 653 654 mlen += len; 655 if (mlen > ((UINT64_C(1) << 36) - 32) || 656 (sizeof(len) == 8 && mlen < len)) { 657 return 0; 658 } 659 ctx->len.u[1] = mlen; 660 661 if (ctx->ares) { 662 /* First call to encrypt finalizes GHASH(AAD) */ 663 GCM_MUL(ctx, Xi); 664 ctx->ares = 0; 665 } 666 667 if (is_endian.little) { 668 ctr = GETU32(ctx->Yi.c + 12); 669 } else { 670 ctr = ctx->Yi.d[3]; 671 } 672 673 n = ctx->mres; 674 if (n) { 675 while (n && len) { 676 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 677 --len; 678 n = (n + 1) % 16; 679 } 680 if (n == 0) { 681 GCM_MUL(ctx, Xi); 682 } else { 683 ctx->mres = n; 684 return 1; 685 } 686 } 687 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) { 688 for (i = 0; i < len; ++i) { 689 if (n == 0) { 690 (*block)(ctx->Yi.c, ctx->EKi.c, key); 691 ++ctr; 692 if (is_endian.little) { 693 PUTU32(ctx->Yi.c + 12, ctr); 694 } else { 695 ctx->Yi.d[3] = ctr; 696 } 697 } 698 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 699 n = (n + 1) % 16; 700 if (n == 0) { 701 GCM_MUL(ctx, Xi); 702 } 703 } 704 705 ctx->mres = n; 706 return 1; 707 } 708 #if defined(GHASH) && defined(GHASH_CHUNK) 709 while (len >= GHASH_CHUNK) { 710 size_t j = GHASH_CHUNK; 711 712 while (j) { 713 size_t *out_t = (size_t *)out; 714 const size_t *in_t = (const size_t *)in; 715 716 (*block)(ctx->Yi.c, ctx->EKi.c, key); 717 ++ctr; 718 if (is_endian.little) { 719 PUTU32(ctx->Yi.c + 12, ctr); 720 } else { 721 ctx->Yi.d[3] = ctr; 722 } 723 for (i = 0; i < 16 / sizeof(size_t); ++i) { 724 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 725 } 726 out += 16; 727 in += 16; 728 j -= 16; 729 } 730 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 731 len -= GHASH_CHUNK; 732 } 733 if ((i = (len & (size_t) - 16))) { 734 size_t j = i; 735 736 while (len >= 16) { 737 size_t *out_t = (size_t *)out; 738 const size_t *in_t = (const size_t *)in; 739 740 (*block)(ctx->Yi.c, ctx->EKi.c, key); 741 ++ctr; 742 if (is_endian.little) { 743 PUTU32(ctx->Yi.c + 12, ctr); 744 } else { 745 ctx->Yi.d[3] = ctr; 746 } 747 for (i = 0; i < 16 / sizeof(size_t); ++i) { 748 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 749 } 750 out += 16; 751 in += 16; 752 len -= 16; 753 } 754 GHASH(ctx, out - j, j); 755 } 756 #else 757 while (len >= 16) { 758 size_t *out_t = (size_t *)out; 759 const size_t *in_t = (const size_t *)in; 760 761 (*block)(ctx->Yi.c, ctx->EKi.c, key); 762 ++ctr; 763 if (is_endian.little) { 764 PUTU32(ctx->Yi.c + 12, ctr); 765 } else { 766 ctx->Yi.d[3] = ctr; 767 } 768 for (i = 0; i < 16 / sizeof(size_t); ++i) { 769 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 770 } 771 GCM_MUL(ctx, Xi); 772 out += 16; 773 in += 16; 774 len -= 16; 775 } 776 #endif 777 if (len) { 778 (*block)(ctx->Yi.c, ctx->EKi.c, key); 779 ++ctr; 780 if (is_endian.little) { 781 PUTU32(ctx->Yi.c + 12, ctr); 782 } else { 783 ctx->Yi.d[3] = ctr; 784 } 785 while (len--) { 786 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 787 ++n; 788 } 789 } 790 791 ctx->mres = n; 792 return 1; 793 } 794 795 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key, 796 const unsigned char *in, unsigned char *out, 797 size_t len) { 798 const union { 799 long one; 800 char little; 801 } is_endian = {1}; 802 unsigned int n, ctr; 803 size_t i; 804 uint64_t mlen = ctx->len.u[1]; 805 block128_f block = ctx->block; 806 #ifdef GCM_FUNCREF_4BIT 807 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 808 #ifdef GHASH 809 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 810 size_t len) = ctx->ghash; 811 #endif 812 #endif 813 814 mlen += len; 815 if (mlen > ((UINT64_C(1) << 36) - 32) || 816 (sizeof(len) == 8 && mlen < len)) { 817 return 0; 818 } 819 ctx->len.u[1] = mlen; 820 821 if (ctx->ares) { 822 /* First call to decrypt finalizes GHASH(AAD) */ 823 GCM_MUL(ctx, Xi); 824 ctx->ares = 0; 825 } 826 827 if (is_endian.little) { 828 ctr = GETU32(ctx->Yi.c + 12); 829 } else { 830 ctr = ctx->Yi.d[3]; 831 } 832 833 n = ctx->mres; 834 if (n) { 835 while (n && len) { 836 uint8_t c = *(in++); 837 *(out++) = c ^ ctx->EKi.c[n]; 838 ctx->Xi.c[n] ^= c; 839 --len; 840 n = (n + 1) % 16; 841 } 842 if (n == 0) { 843 GCM_MUL(ctx, Xi); 844 } else { 845 ctx->mres = n; 846 return 1; 847 } 848 } 849 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) { 850 for (i = 0; i < len; ++i) { 851 uint8_t c; 852 if (n == 0) { 853 (*block)(ctx->Yi.c, ctx->EKi.c, key); 854 ++ctr; 855 if (is_endian.little) { 856 PUTU32(ctx->Yi.c + 12, ctr); 857 } else { 858 ctx->Yi.d[3] = ctr; 859 } 860 } 861 c = in[i]; 862 out[i] = c ^ ctx->EKi.c[n]; 863 ctx->Xi.c[n] ^= c; 864 n = (n + 1) % 16; 865 if (n == 0) { 866 GCM_MUL(ctx, Xi); 867 } 868 } 869 870 ctx->mres = n; 871 return 1; 872 } 873 #if defined(GHASH) && defined(GHASH_CHUNK) 874 while (len >= GHASH_CHUNK) { 875 size_t j = GHASH_CHUNK; 876 877 GHASH(ctx, in, GHASH_CHUNK); 878 while (j) { 879 size_t *out_t = (size_t *)out; 880 const size_t *in_t = (const size_t *)in; 881 882 (*block)(ctx->Yi.c, ctx->EKi.c, key); 883 ++ctr; 884 if (is_endian.little) { 885 PUTU32(ctx->Yi.c + 12, ctr); 886 } else { 887 ctx->Yi.d[3] = ctr; 888 } 889 for (i = 0; i < 16 / sizeof(size_t); ++i) { 890 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 891 } 892 out += 16; 893 in += 16; 894 j -= 16; 895 } 896 len -= GHASH_CHUNK; 897 } 898 if ((i = (len & (size_t) - 16))) { 899 GHASH(ctx, in, i); 900 while (len >= 16) { 901 size_t *out_t = (size_t *)out; 902 const size_t *in_t = (const size_t *)in; 903 904 (*block)(ctx->Yi.c, ctx->EKi.c, key); 905 ++ctr; 906 if (is_endian.little) { 907 PUTU32(ctx->Yi.c + 12, ctr); 908 } else { 909 ctx->Yi.d[3] = ctr; 910 } 911 for (i = 0; i < 16 / sizeof(size_t); ++i) { 912 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 913 } 914 out += 16; 915 in += 16; 916 len -= 16; 917 } 918 } 919 #else 920 while (len >= 16) { 921 size_t *out_t = (size_t *)out; 922 const size_t *in_t = (const size_t *)in; 923 924 (*block)(ctx->Yi.c, ctx->EKi.c, key); 925 ++ctr; 926 if (is_endian.little) { 927 PUTU32(ctx->Yi.c + 12, ctr); 928 } else { 929 ctx->Yi.d[3] = ctr; 930 } 931 for (i = 0; i < 16 / sizeof(size_t); ++i) { 932 size_t c = in_t[i]; 933 out_t[i] = c ^ ctx->EKi.t[i]; 934 ctx->Xi.t[i] ^= c; 935 } 936 GCM_MUL(ctx, Xi); 937 out += 16; 938 in += 16; 939 len -= 16; 940 } 941 #endif 942 if (len) { 943 (*block)(ctx->Yi.c, ctx->EKi.c, key); 944 ++ctr; 945 if (is_endian.little) { 946 PUTU32(ctx->Yi.c + 12, ctr); 947 } else { 948 ctx->Yi.d[3] = ctr; 949 } 950 while (len--) { 951 uint8_t c = in[n]; 952 ctx->Xi.c[n] ^= c; 953 out[n] = c ^ ctx->EKi.c[n]; 954 ++n; 955 } 956 } 957 958 ctx->mres = n; 959 return 1; 960 } 961 962 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key, 963 const uint8_t *in, uint8_t *out, size_t len, 964 ctr128_f stream) { 965 const union { 966 long one; 967 char little; 968 } is_endian = {1}; 969 unsigned int n, ctr; 970 uint64_t mlen = ctx->len.u[1]; 971 #ifdef GCM_FUNCREF_4BIT 972 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 973 #ifdef GHASH 974 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 975 size_t len) = ctx->ghash; 976 #endif 977 #endif 978 979 mlen += len; 980 if (mlen > ((UINT64_C(1) << 36) - 32) || 981 (sizeof(len) == 8 && mlen < len)) { 982 return 0; 983 } 984 ctx->len.u[1] = mlen; 985 986 if (ctx->ares) { 987 /* First call to encrypt finalizes GHASH(AAD) */ 988 GCM_MUL(ctx, Xi); 989 ctx->ares = 0; 990 } 991 992 if (is_endian.little) { 993 ctr = GETU32(ctx->Yi.c + 12); 994 } else { 995 ctr = ctx->Yi.d[3]; 996 } 997 998 n = ctx->mres; 999 if (n) { 1000 while (n && len) { 1001 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1002 --len; 1003 n = (n + 1) % 16; 1004 } 1005 if (n == 0) { 1006 GCM_MUL(ctx, Xi); 1007 } else { 1008 ctx->mres = n; 1009 return 1; 1010 } 1011 } 1012 #if defined(GHASH) 1013 while (len >= GHASH_CHUNK) { 1014 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1015 ctr += GHASH_CHUNK / 16; 1016 if (is_endian.little) { 1017 PUTU32(ctx->Yi.c + 12, ctr); 1018 } else { 1019 ctx->Yi.d[3] = ctr; 1020 } 1021 GHASH(ctx, out, GHASH_CHUNK); 1022 out += GHASH_CHUNK; 1023 in += GHASH_CHUNK; 1024 len -= GHASH_CHUNK; 1025 } 1026 #endif 1027 size_t i = len & kSizeTWithoutLower4Bits; 1028 if (i != 0) { 1029 size_t j = i / 16; 1030 1031 (*stream)(in, out, j, key, ctx->Yi.c); 1032 ctr += (unsigned int)j; 1033 if (is_endian.little) { 1034 PUTU32(ctx->Yi.c + 12, ctr); 1035 } else { 1036 ctx->Yi.d[3] = ctr; 1037 } 1038 in += i; 1039 len -= i; 1040 #if defined(GHASH) 1041 GHASH(ctx, out, i); 1042 out += i; 1043 #else 1044 while (j--) { 1045 for (i = 0; i < 16; ++i) { 1046 ctx->Xi.c[i] ^= out[i]; 1047 } 1048 GCM_MUL(ctx, Xi); 1049 out += 16; 1050 } 1051 #endif 1052 } 1053 if (len) { 1054 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 1055 ++ctr; 1056 if (is_endian.little) { 1057 PUTU32(ctx->Yi.c + 12, ctr); 1058 } else { 1059 ctx->Yi.d[3] = ctr; 1060 } 1061 while (len--) { 1062 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1063 ++n; 1064 } 1065 } 1066 1067 ctx->mres = n; 1068 return 1; 1069 } 1070 1071 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key, 1072 const uint8_t *in, uint8_t *out, size_t len, 1073 ctr128_f stream) { 1074 const union { 1075 long one; 1076 char little; 1077 } is_endian = {1}; 1078 unsigned int n, ctr; 1079 uint64_t mlen = ctx->len.u[1]; 1080 #ifdef GCM_FUNCREF_4BIT 1081 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 1082 #ifdef GHASH 1083 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 1084 size_t len) = ctx->ghash; 1085 #endif 1086 #endif 1087 1088 mlen += len; 1089 if (mlen > ((UINT64_C(1) << 36) - 32) || 1090 (sizeof(len) == 8 && mlen < len)) { 1091 return 0; 1092 } 1093 ctx->len.u[1] = mlen; 1094 1095 if (ctx->ares) { 1096 /* First call to decrypt finalizes GHASH(AAD) */ 1097 GCM_MUL(ctx, Xi); 1098 ctx->ares = 0; 1099 } 1100 1101 if (is_endian.little) { 1102 ctr = GETU32(ctx->Yi.c + 12); 1103 } else { 1104 ctr = ctx->Yi.d[3]; 1105 } 1106 1107 n = ctx->mres; 1108 if (n) { 1109 while (n && len) { 1110 uint8_t c = *(in++); 1111 *(out++) = c ^ ctx->EKi.c[n]; 1112 ctx->Xi.c[n] ^= c; 1113 --len; 1114 n = (n + 1) % 16; 1115 } 1116 if (n == 0) { 1117 GCM_MUL(ctx, Xi); 1118 } else { 1119 ctx->mres = n; 1120 return 1; 1121 } 1122 } 1123 #if defined(GHASH) 1124 while (len >= GHASH_CHUNK) { 1125 GHASH(ctx, in, GHASH_CHUNK); 1126 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1127 ctr += GHASH_CHUNK / 16; 1128 if (is_endian.little) { 1129 PUTU32(ctx->Yi.c + 12, ctr); 1130 } else { 1131 ctx->Yi.d[3] = ctr; 1132 } 1133 out += GHASH_CHUNK; 1134 in += GHASH_CHUNK; 1135 len -= GHASH_CHUNK; 1136 } 1137 #endif 1138 size_t i = len & kSizeTWithoutLower4Bits; 1139 if (i != 0) { 1140 size_t j = i / 16; 1141 1142 #if defined(GHASH) 1143 GHASH(ctx, in, i); 1144 #else 1145 while (j--) { 1146 size_t k; 1147 for (k = 0; k < 16; ++k) { 1148 ctx->Xi.c[k] ^= in[k]; 1149 } 1150 GCM_MUL(ctx, Xi); 1151 in += 16; 1152 } 1153 j = i / 16; 1154 in -= i; 1155 #endif 1156 (*stream)(in, out, j, key, ctx->Yi.c); 1157 ctr += (unsigned int)j; 1158 if (is_endian.little) { 1159 PUTU32(ctx->Yi.c + 12, ctr); 1160 } else { 1161 ctx->Yi.d[3] = ctr; 1162 } 1163 out += i; 1164 in += i; 1165 len -= i; 1166 } 1167 if (len) { 1168 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 1169 ++ctr; 1170 if (is_endian.little) { 1171 PUTU32(ctx->Yi.c + 12, ctr); 1172 } else { 1173 ctx->Yi.d[3] = ctr; 1174 } 1175 while (len--) { 1176 uint8_t c = in[n]; 1177 ctx->Xi.c[n] ^= c; 1178 out[n] = c ^ ctx->EKi.c[n]; 1179 ++n; 1180 } 1181 } 1182 1183 ctx->mres = n; 1184 return 1; 1185 } 1186 1187 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) { 1188 const union { 1189 long one; 1190 char little; 1191 } is_endian = {1}; 1192 uint64_t alen = ctx->len.u[0] << 3; 1193 uint64_t clen = ctx->len.u[1] << 3; 1194 #ifdef GCM_FUNCREF_4BIT 1195 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 1196 #endif 1197 1198 if (ctx->mres || ctx->ares) { 1199 GCM_MUL(ctx, Xi); 1200 } 1201 1202 if (is_endian.little) { 1203 #ifdef BSWAP8 1204 alen = BSWAP8(alen); 1205 clen = BSWAP8(clen); 1206 #else 1207 uint8_t *p = ctx->len.c; 1208 1209 ctx->len.u[0] = alen; 1210 ctx->len.u[1] = clen; 1211 1212 alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4); 1213 clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12); 1214 #endif 1215 } 1216 1217 ctx->Xi.u[0] ^= alen; 1218 ctx->Xi.u[1] ^= clen; 1219 GCM_MUL(ctx, Xi); 1220 1221 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1222 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1223 1224 if (tag && len <= sizeof(ctx->Xi)) { 1225 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0; 1226 } else { 1227 return 0; 1228 } 1229 } 1230 1231 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) { 1232 CRYPTO_gcm128_finish(ctx, NULL, 0); 1233 memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1234 } 1235 1236 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) { 1237 if (ctx) { 1238 OPENSSL_cleanse(ctx, sizeof(*ctx)); 1239 OPENSSL_free(ctx); 1240 } 1241 } 1242 1243 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 1244 int crypto_gcm_clmul_enabled(void) { 1245 #ifdef GHASH_ASM 1246 return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */ 1247 OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */ 1248 #else 1249 return 0; 1250 #endif 1251 } 1252 #endif 1253