1 /* ==================================================================== 2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * 3. All advertising materials mentioning features or use of this 17 * software must display the following acknowledgment: 18 * "This product includes software developed by the OpenSSL Project 19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 20 * 21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 22 * endorse or promote products derived from this software without 23 * prior written permission. For written permission, please contact 24 * openssl-core (at) openssl.org. 25 * 26 * 5. Products derived from this software may not be called "OpenSSL" 27 * nor may "OpenSSL" appear in their names without prior written 28 * permission of the OpenSSL Project. 29 * 30 * 6. Redistributions of any form whatsoever must retain the following 31 * acknowledgment: 32 * "This product includes software developed by the OpenSSL Project 33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 46 * OF THE POSSIBILITY OF SUCH DAMAGE. 47 * ==================================================================== */ 48 49 #include <openssl/modes.h> 50 51 #include <assert.h> 52 53 #include <openssl/mem.h> 54 #include <openssl/cpu.h> 55 56 #include "internal.h" 57 #include "../internal.h" 58 59 60 #if !defined(OPENSSL_NO_ASM) && \ 61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM)) 62 #define GHASH_ASM 63 #endif 64 65 #if defined(BSWAP4) && STRICT_ALIGNMENT == 1 66 /* redefine, because alignment is ensured */ 67 #undef GETU32 68 #define GETU32(p) BSWAP4(*(const uint32_t *)(p)) 69 #undef PUTU32 70 #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v) 71 #endif 72 73 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16)) 74 #define REDUCE1BIT(V) \ 75 do { \ 76 if (sizeof(size_t) == 8) { \ 77 uint64_t T = OPENSSL_U64(0xe100000000000000) & (0 - (V.lo & 1)); \ 78 V.lo = (V.hi << 63) | (V.lo >> 1); \ 79 V.hi = (V.hi >> 1) ^ T; \ 80 } else { \ 81 uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \ 82 V.lo = (V.hi << 63) | (V.lo >> 1); \ 83 V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \ 84 } \ 85 } while (0) 86 87 88 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) { 89 u128 V; 90 91 Htable[0].hi = 0; 92 Htable[0].lo = 0; 93 V.hi = H[0]; 94 V.lo = H[1]; 95 96 Htable[8] = V; 97 REDUCE1BIT(V); 98 Htable[4] = V; 99 REDUCE1BIT(V); 100 Htable[2] = V; 101 REDUCE1BIT(V); 102 Htable[1] = V; 103 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 104 V = Htable[4]; 105 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 106 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 107 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 108 V = Htable[8]; 109 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 110 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 111 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 112 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 113 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 114 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 115 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 116 117 #if defined(GHASH_ASM) && defined(OPENSSL_ARM) 118 /* ARM assembler expects specific dword order in Htable. */ 119 { 120 int j; 121 const union { 122 long one; 123 char little; 124 } is_endian = {1}; 125 126 if (is_endian.little) { 127 for (j = 0; j < 16; ++j) { 128 V = Htable[j]; 129 Htable[j].hi = V.lo; 130 Htable[j].lo = V.hi; 131 } 132 } else { 133 for (j = 0; j < 16; ++j) { 134 V = Htable[j]; 135 Htable[j].hi = V.lo << 32 | V.lo >> 32; 136 Htable[j].lo = V.hi << 32 | V.hi >> 32; 137 } 138 } 139 } 140 #endif 141 } 142 143 #if !defined(GHASH_ASM) 144 static const size_t rem_4bit[16] = { 145 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 146 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 147 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 148 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)}; 149 150 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { 151 u128 Z; 152 int cnt = 15; 153 size_t rem, nlo, nhi; 154 const union { 155 long one; 156 char little; 157 } is_endian = {1}; 158 159 nlo = ((const uint8_t *)Xi)[15]; 160 nhi = nlo >> 4; 161 nlo &= 0xf; 162 163 Z.hi = Htable[nlo].hi; 164 Z.lo = Htable[nlo].lo; 165 166 while (1) { 167 rem = (size_t)Z.lo & 0xf; 168 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 169 Z.hi = (Z.hi >> 4); 170 if (sizeof(size_t) == 8) { 171 Z.hi ^= rem_4bit[rem]; 172 } else { 173 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 174 } 175 176 Z.hi ^= Htable[nhi].hi; 177 Z.lo ^= Htable[nhi].lo; 178 179 if (--cnt < 0) { 180 break; 181 } 182 183 nlo = ((const uint8_t *)Xi)[cnt]; 184 nhi = nlo >> 4; 185 nlo &= 0xf; 186 187 rem = (size_t)Z.lo & 0xf; 188 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 189 Z.hi = (Z.hi >> 4); 190 if (sizeof(size_t) == 8) { 191 Z.hi ^= rem_4bit[rem]; 192 } else { 193 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 194 } 195 196 Z.hi ^= Htable[nlo].hi; 197 Z.lo ^= Htable[nlo].lo; 198 } 199 200 if (is_endian.little) { 201 #ifdef BSWAP8 202 Xi[0] = BSWAP8(Z.hi); 203 Xi[1] = BSWAP8(Z.lo); 204 #else 205 uint8_t *p = (uint8_t *)Xi; 206 uint32_t v; 207 v = (uint32_t)(Z.hi >> 32); 208 PUTU32(p, v); 209 v = (uint32_t)(Z.hi); 210 PUTU32(p + 4, v); 211 v = (uint32_t)(Z.lo >> 32); 212 PUTU32(p + 8, v); 213 v = (uint32_t)(Z.lo); 214 PUTU32(p + 12, v); 215 #endif 216 } else { 217 Xi[0] = Z.hi; 218 Xi[1] = Z.lo; 219 } 220 } 221 222 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 223 * details... Compiler-generated code doesn't seem to give any 224 * performance improvement, at least not on x86[_64]. It's here 225 * mostly as reference and a placeholder for possible future 226 * non-trivial optimization[s]... */ 227 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 228 size_t len) { 229 u128 Z; 230 int cnt; 231 size_t rem, nlo, nhi; 232 const union { 233 long one; 234 char little; 235 } is_endian = {1}; 236 237 do { 238 cnt = 15; 239 nlo = ((const uint8_t *)Xi)[15]; 240 nlo ^= inp[15]; 241 nhi = nlo >> 4; 242 nlo &= 0xf; 243 244 Z.hi = Htable[nlo].hi; 245 Z.lo = Htable[nlo].lo; 246 247 while (1) { 248 rem = (size_t)Z.lo & 0xf; 249 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 250 Z.hi = (Z.hi >> 4); 251 if (sizeof(size_t) == 8) { 252 Z.hi ^= rem_4bit[rem]; 253 } else { 254 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 255 } 256 257 Z.hi ^= Htable[nhi].hi; 258 Z.lo ^= Htable[nhi].lo; 259 260 if (--cnt < 0) { 261 break; 262 } 263 264 nlo = ((const uint8_t *)Xi)[cnt]; 265 nlo ^= inp[cnt]; 266 nhi = nlo >> 4; 267 nlo &= 0xf; 268 269 rem = (size_t)Z.lo & 0xf; 270 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 271 Z.hi = (Z.hi >> 4); 272 if (sizeof(size_t) == 8) { 273 Z.hi ^= rem_4bit[rem]; 274 } else { 275 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 276 } 277 278 Z.hi ^= Htable[nlo].hi; 279 Z.lo ^= Htable[nlo].lo; 280 } 281 282 if (is_endian.little) { 283 #ifdef BSWAP8 284 Xi[0] = BSWAP8(Z.hi); 285 Xi[1] = BSWAP8(Z.lo); 286 #else 287 uint8_t *p = (uint8_t *)Xi; 288 uint32_t v; 289 v = (uint32_t)(Z.hi >> 32); 290 PUTU32(p, v); 291 v = (uint32_t)(Z.hi); 292 PUTU32(p + 4, v); 293 v = (uint32_t)(Z.lo >> 32); 294 PUTU32(p + 8, v); 295 v = (uint32_t)(Z.lo); 296 PUTU32(p + 12, v); 297 #endif 298 } else { 299 Xi[0] = Z.hi; 300 Xi[1] = Z.lo; 301 } 302 } while (inp += 16, len -= 16); 303 } 304 #else /* GHASH_ASM */ 305 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); 306 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 307 size_t len); 308 #endif 309 310 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable) 311 #if defined(GHASH_ASM) 312 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len) 313 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache 314 * trashing effect. In other words idea is to hash data while it's 315 * still in L1 cache after encryption pass... */ 316 #define GHASH_CHUNK (3 * 1024) 317 #endif 318 319 320 #if defined(GHASH_ASM) 321 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 322 #define GHASH_ASM_X86_OR_64 323 #define GCM_FUNCREF_4BIT 324 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); 325 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); 326 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 327 size_t len); 328 329 #if defined(OPENSSL_X86) 330 #define gcm_init_avx gcm_init_clmul 331 #define gcm_gmult_avx gcm_gmult_clmul 332 #define gcm_ghash_avx gcm_ghash_clmul 333 #else 334 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); 335 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); 336 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len); 337 #endif 338 339 #if defined(OPENSSL_X86) 340 #define GHASH_ASM_X86 341 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); 342 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 343 size_t len); 344 345 void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]); 346 void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 347 size_t len); 348 #endif 349 #elif defined(OPENSSL_ARM) 350 #include "../arm_arch.h" 351 #if __ARM_ARCH__ >= 7 352 #define GHASH_ASM_ARM 353 #define GCM_FUNCREF_4BIT 354 void gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]); 355 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); 356 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 357 size_t len); 358 #endif 359 #endif 360 #endif 361 362 #ifdef GCM_FUNCREF_4BIT 363 #undef GCM_MUL 364 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable) 365 #ifdef GHASH 366 #undef GHASH 367 #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len) 368 #endif 369 #endif 370 371 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) { 372 GCM128_CONTEXT *ret; 373 374 ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT)); 375 if (ret != NULL) { 376 CRYPTO_gcm128_init(ret, key, block); 377 } 378 379 return ret; 380 } 381 382 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) { 383 const union { 384 long one; 385 char little; 386 } is_endian = {1}; 387 388 memset(ctx, 0, sizeof(*ctx)); 389 ctx->block = block; 390 ctx->key = key; 391 392 (*block)(ctx->H.c, ctx->H.c, key); 393 394 if (is_endian.little) { 395 /* H is stored in host byte order */ 396 #ifdef BSWAP8 397 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 398 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 399 #else 400 uint8_t *p = ctx->H.c; 401 uint64_t hi, lo; 402 hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4); 403 lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12); 404 ctx->H.u[0] = hi; 405 ctx->H.u[1] = lo; 406 #endif 407 } 408 409 #if defined(GHASH_ASM_X86_OR_64) 410 if (crypto_gcm_clmul_enabled()) { 411 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 412 gcm_init_avx(ctx->Htable, ctx->H.u); 413 ctx->gmult = gcm_gmult_avx; 414 ctx->ghash = gcm_ghash_avx; 415 } else { 416 gcm_init_clmul(ctx->Htable, ctx->H.u); 417 ctx->gmult = gcm_gmult_clmul; 418 ctx->ghash = gcm_ghash_clmul; 419 } 420 return; 421 } 422 gcm_init_4bit(ctx->Htable, ctx->H.u); 423 #if defined(GHASH_ASM_X86) /* x86 only */ 424 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 425 ctx->gmult = gcm_gmult_4bit_mmx; 426 ctx->ghash = gcm_ghash_4bit_mmx; 427 } else { 428 ctx->gmult = gcm_gmult_4bit_x86; 429 ctx->ghash = gcm_ghash_4bit_x86; 430 } 431 #else 432 ctx->gmult = gcm_gmult_4bit; 433 ctx->ghash = gcm_ghash_4bit; 434 #endif 435 #elif defined(GHASH_ASM_ARM) 436 if (CRYPTO_is_NEON_capable()) { 437 gcm_init_neon(ctx->Htable,ctx->H.u); 438 ctx->gmult = gcm_gmult_neon; 439 ctx->ghash = gcm_ghash_neon; 440 } else { 441 gcm_init_4bit(ctx->Htable, ctx->H.u); 442 ctx->gmult = gcm_gmult_4bit; 443 ctx->ghash = gcm_ghash_4bit; 444 } 445 #else 446 ctx->gmult = gcm_gmult_4bit; 447 ctx->ghash = gcm_ghash_4bit; 448 gcm_init_4bit(ctx->Htable, ctx->H.u); 449 #endif 450 } 451 452 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) { 453 const union { 454 long one; 455 char little; 456 } is_endian = {1}; 457 unsigned int ctr; 458 #ifdef GCM_FUNCREF_4BIT 459 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 460 #endif 461 462 ctx->Yi.u[0] = 0; 463 ctx->Yi.u[1] = 0; 464 ctx->Xi.u[0] = 0; 465 ctx->Xi.u[1] = 0; 466 ctx->len.u[0] = 0; /* AAD length */ 467 ctx->len.u[1] = 0; /* message length */ 468 ctx->ares = 0; 469 ctx->mres = 0; 470 471 if (len == 12) { 472 memcpy(ctx->Yi.c, iv, 12); 473 ctx->Yi.c[15] = 1; 474 ctr = 1; 475 } else { 476 size_t i; 477 uint64_t len0 = len; 478 479 while (len >= 16) { 480 for (i = 0; i < 16; ++i) { 481 ctx->Yi.c[i] ^= iv[i]; 482 } 483 GCM_MUL(ctx, Yi); 484 iv += 16; 485 len -= 16; 486 } 487 if (len) { 488 for (i = 0; i < len; ++i) { 489 ctx->Yi.c[i] ^= iv[i]; 490 } 491 GCM_MUL(ctx, Yi); 492 } 493 len0 <<= 3; 494 if (is_endian.little) { 495 #ifdef BSWAP8 496 ctx->Yi.u[1] ^= BSWAP8(len0); 497 #else 498 ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56); 499 ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48); 500 ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40); 501 ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32); 502 ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24); 503 ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16); 504 ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8); 505 ctx->Yi.c[15] ^= (uint8_t)(len0); 506 #endif 507 } else { 508 ctx->Yi.u[1] ^= len0; 509 } 510 511 GCM_MUL(ctx, Yi); 512 513 if (is_endian.little) { 514 ctr = GETU32(ctx->Yi.c + 12); 515 } else { 516 ctr = ctx->Yi.d[3]; 517 } 518 } 519 520 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key); 521 ++ctr; 522 if (is_endian.little) { 523 PUTU32(ctx->Yi.c + 12, ctr); 524 } else { 525 ctx->Yi.d[3] = ctr; 526 } 527 } 528 529 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) { 530 size_t i; 531 unsigned int n; 532 uint64_t alen = ctx->len.u[0]; 533 #ifdef GCM_FUNCREF_4BIT 534 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 535 #ifdef GHASH 536 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 537 size_t len) = ctx->ghash; 538 #endif 539 #endif 540 541 if (ctx->len.u[1]) { 542 return 0; 543 } 544 545 alen += len; 546 if (alen > (OPENSSL_U64(1) << 61) || (sizeof(len) == 8 && alen < len)) { 547 return 0; 548 } 549 ctx->len.u[0] = alen; 550 551 n = ctx->ares; 552 if (n) { 553 while (n && len) { 554 ctx->Xi.c[n] ^= *(aad++); 555 --len; 556 n = (n + 1) % 16; 557 } 558 if (n == 0) { 559 GCM_MUL(ctx, Xi); 560 } else { 561 ctx->ares = n; 562 return 1; 563 } 564 } 565 566 #ifdef GHASH 567 if ((i = (len & (size_t) - 16))) { 568 GHASH(ctx, aad, i); 569 aad += i; 570 len -= i; 571 } 572 #else 573 while (len >= 16) { 574 for (i = 0; i < 16; ++i) { 575 ctx->Xi.c[i] ^= aad[i]; 576 } 577 GCM_MUL(ctx, Xi); 578 aad += 16; 579 len -= 16; 580 } 581 #endif 582 if (len) { 583 n = (unsigned int)len; 584 for (i = 0; i < len; ++i) 585 ctx->Xi.c[i] ^= aad[i]; 586 } 587 588 ctx->ares = n; 589 return 1; 590 } 591 592 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in, 593 unsigned char *out, size_t len) { 594 const union { 595 long one; 596 char little; 597 } is_endian = {1}; 598 unsigned int n, ctr; 599 size_t i; 600 uint64_t mlen = ctx->len.u[1]; 601 block128_f block = ctx->block; 602 void *key = ctx->key; 603 #ifdef GCM_FUNCREF_4BIT 604 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 605 #ifdef GHASH 606 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 607 size_t len) = ctx->ghash; 608 #endif 609 #endif 610 611 mlen += len; 612 if (mlen > ((OPENSSL_U64(1) << 36) - 32) || 613 (sizeof(len) == 8 && mlen < len)) { 614 return 0; 615 } 616 ctx->len.u[1] = mlen; 617 618 if (ctx->ares) { 619 /* First call to encrypt finalizes GHASH(AAD) */ 620 GCM_MUL(ctx, Xi); 621 ctx->ares = 0; 622 } 623 624 if (is_endian.little) { 625 ctr = GETU32(ctx->Yi.c + 12); 626 } else { 627 ctr = ctx->Yi.d[3]; 628 } 629 630 n = ctx->mres; 631 if (n) { 632 while (n && len) { 633 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 634 --len; 635 n = (n + 1) % 16; 636 } 637 if (n == 0) { 638 GCM_MUL(ctx, Xi); 639 } else { 640 ctx->mres = n; 641 return 1; 642 } 643 } 644 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) { 645 for (i = 0; i < len; ++i) { 646 if (n == 0) { 647 (*block)(ctx->Yi.c, ctx->EKi.c, key); 648 ++ctr; 649 if (is_endian.little) { 650 PUTU32(ctx->Yi.c + 12, ctr); 651 } else { 652 ctx->Yi.d[3] = ctr; 653 } 654 } 655 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 656 n = (n + 1) % 16; 657 if (n == 0) { 658 GCM_MUL(ctx, Xi); 659 } 660 } 661 662 ctx->mres = n; 663 return 1; 664 } 665 #if defined(GHASH) && defined(GHASH_CHUNK) 666 while (len >= GHASH_CHUNK) { 667 size_t j = GHASH_CHUNK; 668 669 while (j) { 670 size_t *out_t = (size_t *)out; 671 const size_t *in_t = (const size_t *)in; 672 673 (*block)(ctx->Yi.c, ctx->EKi.c, key); 674 ++ctr; 675 if (is_endian.little) { 676 PUTU32(ctx->Yi.c + 12, ctr); 677 } else { 678 ctx->Yi.d[3] = ctr; 679 } 680 for (i = 0; i < 16 / sizeof(size_t); ++i) { 681 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 682 } 683 out += 16; 684 in += 16; 685 j -= 16; 686 } 687 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 688 len -= GHASH_CHUNK; 689 } 690 if ((i = (len & (size_t) - 16))) { 691 size_t j = i; 692 693 while (len >= 16) { 694 size_t *out_t = (size_t *)out; 695 const size_t *in_t = (const size_t *)in; 696 697 (*block)(ctx->Yi.c, ctx->EKi.c, key); 698 ++ctr; 699 if (is_endian.little) { 700 PUTU32(ctx->Yi.c + 12, ctr); 701 } else { 702 ctx->Yi.d[3] = ctr; 703 } 704 for (i = 0; i < 16 / sizeof(size_t); ++i) { 705 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 706 } 707 out += 16; 708 in += 16; 709 len -= 16; 710 } 711 GHASH(ctx, out - j, j); 712 } 713 #else 714 while (len >= 16) { 715 size_t *out_t = (size_t *)out; 716 const size_t *in_t = (const size_t *)in; 717 718 (*block)(ctx->Yi.c, ctx->EKi.c, key); 719 ++ctr; 720 if (is_endian.little) { 721 PUTU32(ctx->Yi.c + 12, ctr); 722 } else { 723 ctx->Yi.d[3] = ctr; 724 } 725 for (i = 0; i < 16 / sizeof(size_t); ++i) { 726 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 727 } 728 GCM_MUL(ctx, Xi); 729 out += 16; 730 in += 16; 731 len -= 16; 732 } 733 #endif 734 if (len) { 735 (*block)(ctx->Yi.c, ctx->EKi.c, key); 736 ++ctr; 737 if (is_endian.little) { 738 PUTU32(ctx->Yi.c + 12, ctr); 739 } else { 740 ctx->Yi.d[3] = ctr; 741 } 742 while (len--) { 743 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 744 ++n; 745 } 746 } 747 748 ctx->mres = n; 749 return 1; 750 } 751 752 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in, 753 unsigned char *out, size_t len) { 754 const union { 755 long one; 756 char little; 757 } is_endian = {1}; 758 unsigned int n, ctr; 759 size_t i; 760 uint64_t mlen = ctx->len.u[1]; 761 block128_f block = ctx->block; 762 void *key = ctx->key; 763 #ifdef GCM_FUNCREF_4BIT 764 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 765 #ifdef GHASH 766 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 767 size_t len) = ctx->ghash; 768 #endif 769 #endif 770 771 mlen += len; 772 if (mlen > ((OPENSSL_U64(1) << 36) - 32) || 773 (sizeof(len) == 8 && mlen < len)) { 774 return 0; 775 } 776 ctx->len.u[1] = mlen; 777 778 if (ctx->ares) { 779 /* First call to decrypt finalizes GHASH(AAD) */ 780 GCM_MUL(ctx, Xi); 781 ctx->ares = 0; 782 } 783 784 if (is_endian.little) { 785 ctr = GETU32(ctx->Yi.c + 12); 786 } else { 787 ctr = ctx->Yi.d[3]; 788 } 789 790 n = ctx->mres; 791 if (n) { 792 while (n && len) { 793 uint8_t c = *(in++); 794 *(out++) = c ^ ctx->EKi.c[n]; 795 ctx->Xi.c[n] ^= c; 796 --len; 797 n = (n + 1) % 16; 798 } 799 if (n == 0) { 800 GCM_MUL(ctx, Xi); 801 } else { 802 ctx->mres = n; 803 return 1; 804 } 805 } 806 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) { 807 for (i = 0; i < len; ++i) { 808 uint8_t c; 809 if (n == 0) { 810 (*block)(ctx->Yi.c, ctx->EKi.c, key); 811 ++ctr; 812 if (is_endian.little) { 813 PUTU32(ctx->Yi.c + 12, ctr); 814 } else { 815 ctx->Yi.d[3] = ctr; 816 } 817 } 818 c = in[i]; 819 out[i] = c ^ ctx->EKi.c[n]; 820 ctx->Xi.c[n] ^= c; 821 n = (n + 1) % 16; 822 if (n == 0) { 823 GCM_MUL(ctx, Xi); 824 } 825 } 826 827 ctx->mres = n; 828 return 1; 829 } 830 #if defined(GHASH) && defined(GHASH_CHUNK) 831 while (len >= GHASH_CHUNK) { 832 size_t j = GHASH_CHUNK; 833 834 GHASH(ctx, in, GHASH_CHUNK); 835 while (j) { 836 size_t *out_t = (size_t *)out; 837 const size_t *in_t = (const size_t *)in; 838 839 (*block)(ctx->Yi.c, ctx->EKi.c, key); 840 ++ctr; 841 if (is_endian.little) { 842 PUTU32(ctx->Yi.c + 12, ctr); 843 } else { 844 ctx->Yi.d[3] = ctr; 845 } 846 for (i = 0; i < 16 / sizeof(size_t); ++i) { 847 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 848 } 849 out += 16; 850 in += 16; 851 j -= 16; 852 } 853 len -= GHASH_CHUNK; 854 } 855 if ((i = (len & (size_t) - 16))) { 856 GHASH(ctx, in, i); 857 while (len >= 16) { 858 size_t *out_t = (size_t *)out; 859 const size_t *in_t = (const size_t *)in; 860 861 (*block)(ctx->Yi.c, ctx->EKi.c, key); 862 ++ctr; 863 if (is_endian.little) { 864 PUTU32(ctx->Yi.c + 12, ctr); 865 } else { 866 ctx->Yi.d[3] = ctr; 867 } 868 for (i = 0; i < 16 / sizeof(size_t); ++i) { 869 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 870 } 871 out += 16; 872 in += 16; 873 len -= 16; 874 } 875 } 876 #else 877 while (len >= 16) { 878 size_t *out_t = (size_t *)out; 879 const size_t *in_t = (const size_t *)in; 880 881 (*block)(ctx->Yi.c, ctx->EKi.c, key); 882 ++ctr; 883 if (is_endian.little) { 884 PUTU32(ctx->Yi.c + 12, ctr); 885 } else { 886 ctx->Yi.d[3] = ctr; 887 } 888 for (i = 0; i < 16 / sizeof(size_t); ++i) { 889 size_t c = in_t[i]; 890 out_t[i] = c ^ ctx->EKi.t[i]; 891 ctx->Xi.t[i] ^= c; 892 } 893 GCM_MUL(ctx, Xi); 894 out += 16; 895 in += 16; 896 len -= 16; 897 } 898 #endif 899 if (len) { 900 (*block)(ctx->Yi.c, ctx->EKi.c, key); 901 ++ctr; 902 if (is_endian.little) { 903 PUTU32(ctx->Yi.c + 12, ctr); 904 } else { 905 ctx->Yi.d[3] = ctr; 906 } 907 while (len--) { 908 uint8_t c = in[n]; 909 ctx->Xi.c[n] ^= c; 910 out[n] = c ^ ctx->EKi.c[n]; 911 ++n; 912 } 913 } 914 915 ctx->mres = n; 916 return 1; 917 } 918 919 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in, 920 uint8_t *out, size_t len, ctr128_f stream) { 921 const union { 922 long one; 923 char little; 924 } is_endian = {1}; 925 unsigned int n, ctr; 926 size_t i; 927 uint64_t mlen = ctx->len.u[1]; 928 void *key = ctx->key; 929 #ifdef GCM_FUNCREF_4BIT 930 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 931 #ifdef GHASH 932 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 933 size_t len) = ctx->ghash; 934 #endif 935 #endif 936 937 mlen += len; 938 if (mlen > ((OPENSSL_U64(1) << 36) - 32) || 939 (sizeof(len) == 8 && mlen < len)) { 940 return 0; 941 } 942 ctx->len.u[1] = mlen; 943 944 if (ctx->ares) { 945 /* First call to encrypt finalizes GHASH(AAD) */ 946 GCM_MUL(ctx, Xi); 947 ctx->ares = 0; 948 } 949 950 if (is_endian.little) { 951 ctr = GETU32(ctx->Yi.c + 12); 952 } else { 953 ctr = ctx->Yi.d[3]; 954 } 955 956 n = ctx->mres; 957 if (n) { 958 while (n && len) { 959 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 960 --len; 961 n = (n + 1) % 16; 962 } 963 if (n == 0) { 964 GCM_MUL(ctx, Xi); 965 } else { 966 ctx->mres = n; 967 return 1; 968 } 969 } 970 #if defined(GHASH) 971 while (len >= GHASH_CHUNK) { 972 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 973 ctr += GHASH_CHUNK / 16; 974 if (is_endian.little) { 975 PUTU32(ctx->Yi.c + 12, ctr); 976 } else { 977 ctx->Yi.d[3] = ctr; 978 } 979 GHASH(ctx, out, GHASH_CHUNK); 980 out += GHASH_CHUNK; 981 in += GHASH_CHUNK; 982 len -= GHASH_CHUNK; 983 } 984 #endif 985 if ((i = (len & (size_t) - 16))) { 986 size_t j = i / 16; 987 988 (*stream)(in, out, j, key, ctx->Yi.c); 989 ctr += (unsigned int)j; 990 if (is_endian.little) { 991 PUTU32(ctx->Yi.c + 12, ctr); 992 } else { 993 ctx->Yi.d[3] = ctr; 994 } 995 in += i; 996 len -= i; 997 #if defined(GHASH) 998 GHASH(ctx, out, i); 999 out += i; 1000 #else 1001 while (j--) { 1002 for (i = 0; i < 16; ++i) { 1003 ctx->Xi.c[i] ^= out[i]; 1004 } 1005 GCM_MUL(ctx, Xi); 1006 out += 16; 1007 } 1008 #endif 1009 } 1010 if (len) { 1011 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 1012 ++ctr; 1013 if (is_endian.little) { 1014 PUTU32(ctx->Yi.c + 12, ctr); 1015 } else { 1016 ctx->Yi.d[3] = ctr; 1017 } 1018 while (len--) { 1019 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1020 ++n; 1021 } 1022 } 1023 1024 ctx->mres = n; 1025 return 1; 1026 } 1027 1028 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in, 1029 uint8_t *out, size_t len, 1030 ctr128_f stream) { 1031 const union { 1032 long one; 1033 char little; 1034 } is_endian = {1}; 1035 unsigned int n, ctr; 1036 size_t i; 1037 uint64_t mlen = ctx->len.u[1]; 1038 void *key = ctx->key; 1039 #ifdef GCM_FUNCREF_4BIT 1040 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 1041 #ifdef GHASH 1042 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 1043 size_t len) = ctx->ghash; 1044 #endif 1045 #endif 1046 1047 mlen += len; 1048 if (mlen > ((OPENSSL_U64(1) << 36) - 32) || 1049 (sizeof(len) == 8 && mlen < len)) { 1050 return 0; 1051 } 1052 ctx->len.u[1] = mlen; 1053 1054 if (ctx->ares) { 1055 /* First call to decrypt finalizes GHASH(AAD) */ 1056 GCM_MUL(ctx, Xi); 1057 ctx->ares = 0; 1058 } 1059 1060 if (is_endian.little) { 1061 ctr = GETU32(ctx->Yi.c + 12); 1062 } else { 1063 ctr = ctx->Yi.d[3]; 1064 } 1065 1066 n = ctx->mres; 1067 if (n) { 1068 while (n && len) { 1069 uint8_t c = *(in++); 1070 *(out++) = c ^ ctx->EKi.c[n]; 1071 ctx->Xi.c[n] ^= c; 1072 --len; 1073 n = (n + 1) % 16; 1074 } 1075 if (n == 0) { 1076 GCM_MUL(ctx, Xi); 1077 } else { 1078 ctx->mres = n; 1079 return 1; 1080 } 1081 } 1082 #if defined(GHASH) 1083 while (len >= GHASH_CHUNK) { 1084 GHASH(ctx, in, GHASH_CHUNK); 1085 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1086 ctr += GHASH_CHUNK / 16; 1087 if (is_endian.little) 1088 PUTU32(ctx->Yi.c + 12, ctr); 1089 else 1090 ctx->Yi.d[3] = ctr; 1091 out += GHASH_CHUNK; 1092 in += GHASH_CHUNK; 1093 len -= GHASH_CHUNK; 1094 } 1095 #endif 1096 if ((i = (len & (size_t) - 16))) { 1097 size_t j = i / 16; 1098 1099 #if defined(GHASH) 1100 GHASH(ctx, in, i); 1101 #else 1102 while (j--) { 1103 size_t k; 1104 for (k = 0; k < 16; ++k) 1105 ctx->Xi.c[k] ^= in[k]; 1106 GCM_MUL(ctx, Xi); 1107 in += 16; 1108 } 1109 j = i / 16; 1110 in -= i; 1111 #endif 1112 (*stream)(in, out, j, key, ctx->Yi.c); 1113 ctr += (unsigned int)j; 1114 if (is_endian.little) 1115 PUTU32(ctx->Yi.c + 12, ctr); 1116 else 1117 ctx->Yi.d[3] = ctr; 1118 out += i; 1119 in += i; 1120 len -= i; 1121 } 1122 if (len) { 1123 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 1124 ++ctr; 1125 if (is_endian.little) 1126 PUTU32(ctx->Yi.c + 12, ctr); 1127 else 1128 ctx->Yi.d[3] = ctr; 1129 while (len--) { 1130 uint8_t c = in[n]; 1131 ctx->Xi.c[n] ^= c; 1132 out[n] = c ^ ctx->EKi.c[n]; 1133 ++n; 1134 } 1135 } 1136 1137 ctx->mres = n; 1138 return 1; 1139 } 1140 1141 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) { 1142 const union { 1143 long one; 1144 char little; 1145 } is_endian = {1}; 1146 uint64_t alen = ctx->len.u[0] << 3; 1147 uint64_t clen = ctx->len.u[1] << 3; 1148 #ifdef GCM_FUNCREF_4BIT 1149 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 1150 #endif 1151 1152 if (ctx->mres || ctx->ares) { 1153 GCM_MUL(ctx, Xi); 1154 } 1155 1156 if (is_endian.little) { 1157 #ifdef BSWAP8 1158 alen = BSWAP8(alen); 1159 clen = BSWAP8(clen); 1160 #else 1161 uint8_t *p = ctx->len.c; 1162 1163 ctx->len.u[0] = alen; 1164 ctx->len.u[1] = clen; 1165 1166 alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4); 1167 clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12); 1168 #endif 1169 } 1170 1171 ctx->Xi.u[0] ^= alen; 1172 ctx->Xi.u[1] ^= clen; 1173 GCM_MUL(ctx, Xi); 1174 1175 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1176 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1177 1178 if (tag && len <= sizeof(ctx->Xi)) { 1179 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0; 1180 } else { 1181 return 0; 1182 } 1183 } 1184 1185 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) { 1186 CRYPTO_gcm128_finish(ctx, NULL, 0); 1187 memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1188 } 1189 1190 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) { 1191 if (ctx) { 1192 OPENSSL_cleanse(ctx, sizeof(*ctx)); 1193 OPENSSL_free(ctx); 1194 } 1195 } 1196 1197 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 1198 int crypto_gcm_clmul_enabled(void) { 1199 #ifdef GHASH_ASM 1200 return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */ 1201 OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */ 1202 #else 1203 return 0; 1204 #endif 1205 } 1206 #endif 1207