Home | History | Annotate | Download | only in modes
      1 /* ====================================================================
      2  * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in
     13  *    the documentation and/or other materials provided with the
     14  *    distribution.
     15  *
     16  * 3. All advertising materials mentioning features or use of this
     17  *    software must display the following acknowledgment:
     18  *    "This product includes software developed by the OpenSSL Project
     19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
     20  *
     21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     22  *    endorse or promote products derived from this software without
     23  *    prior written permission. For written permission, please contact
     24  *    openssl-core (at) openssl.org.
     25  *
     26  * 5. Products derived from this software may not be called "OpenSSL"
     27  *    nor may "OpenSSL" appear in their names without prior written
     28  *    permission of the OpenSSL Project.
     29  *
     30  * 6. Redistributions of any form whatsoever must retain the following
     31  *    acknowledgment:
     32  *    "This product includes software developed by the OpenSSL Project
     33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
     34  *
     35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     46  * OF THE POSSIBILITY OF SUCH DAMAGE.
     47  * ==================================================================== */
     48 
     49 #include <openssl/modes.h>
     50 
     51 #include <assert.h>
     52 
     53 #include <openssl/mem.h>
     54 #include <openssl/cpu.h>
     55 
     56 #include "internal.h"
     57 #include "../internal.h"
     58 
     59 
     60 #if !defined(OPENSSL_NO_ASM) && \
     61     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
     62 #define GHASH_ASM
     63 #endif
     64 
     65 #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
     66 /* redefine, because alignment is ensured */
     67 #undef GETU32
     68 #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
     69 #undef PUTU32
     70 #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
     71 #endif
     72 
     73 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
     74 #define REDUCE1BIT(V)                                                  \
     75   do {                                                                 \
     76     if (sizeof(size_t) == 8) {                                         \
     77       uint64_t T = OPENSSL_U64(0xe100000000000000) & (0 - (V.lo & 1)); \
     78       V.lo = (V.hi << 63) | (V.lo >> 1);                               \
     79       V.hi = (V.hi >> 1) ^ T;                                          \
     80     } else {                                                           \
     81       uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1));           \
     82       V.lo = (V.hi << 63) | (V.lo >> 1);                               \
     83       V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32);                        \
     84     }                                                                  \
     85   } while (0)
     86 
     87 
     88 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
     89   u128 V;
     90 
     91   Htable[0].hi = 0;
     92   Htable[0].lo = 0;
     93   V.hi = H[0];
     94   V.lo = H[1];
     95 
     96   Htable[8] = V;
     97   REDUCE1BIT(V);
     98   Htable[4] = V;
     99   REDUCE1BIT(V);
    100   Htable[2] = V;
    101   REDUCE1BIT(V);
    102   Htable[1] = V;
    103   Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
    104   V = Htable[4];
    105   Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
    106   Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
    107   Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
    108   V = Htable[8];
    109   Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
    110   Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
    111   Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
    112   Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
    113   Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
    114   Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
    115   Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
    116 
    117 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
    118   /* ARM assembler expects specific dword order in Htable. */
    119   {
    120     int j;
    121     const union {
    122       long one;
    123       char little;
    124     } is_endian = {1};
    125 
    126     if (is_endian.little) {
    127       for (j = 0; j < 16; ++j) {
    128         V = Htable[j];
    129         Htable[j].hi = V.lo;
    130         Htable[j].lo = V.hi;
    131       }
    132     } else {
    133       for (j = 0; j < 16; ++j) {
    134         V = Htable[j];
    135         Htable[j].hi = V.lo << 32 | V.lo >> 32;
    136         Htable[j].lo = V.hi << 32 | V.hi >> 32;
    137       }
    138     }
    139   }
    140 #endif
    141 }
    142 
    143 #if !defined(GHASH_ASM)
    144 static const size_t rem_4bit[16] = {
    145     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
    146     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
    147     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
    148     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
    149 
    150 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
    151   u128 Z;
    152   int cnt = 15;
    153   size_t rem, nlo, nhi;
    154   const union {
    155     long one;
    156     char little;
    157   } is_endian = {1};
    158 
    159   nlo = ((const uint8_t *)Xi)[15];
    160   nhi = nlo >> 4;
    161   nlo &= 0xf;
    162 
    163   Z.hi = Htable[nlo].hi;
    164   Z.lo = Htable[nlo].lo;
    165 
    166   while (1) {
    167     rem = (size_t)Z.lo & 0xf;
    168     Z.lo = (Z.hi << 60) | (Z.lo >> 4);
    169     Z.hi = (Z.hi >> 4);
    170     if (sizeof(size_t) == 8) {
    171       Z.hi ^= rem_4bit[rem];
    172     } else {
    173       Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
    174     }
    175 
    176     Z.hi ^= Htable[nhi].hi;
    177     Z.lo ^= Htable[nhi].lo;
    178 
    179     if (--cnt < 0) {
    180       break;
    181     }
    182 
    183     nlo = ((const uint8_t *)Xi)[cnt];
    184     nhi = nlo >> 4;
    185     nlo &= 0xf;
    186 
    187     rem = (size_t)Z.lo & 0xf;
    188     Z.lo = (Z.hi << 60) | (Z.lo >> 4);
    189     Z.hi = (Z.hi >> 4);
    190     if (sizeof(size_t) == 8) {
    191       Z.hi ^= rem_4bit[rem];
    192     } else {
    193       Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
    194     }
    195 
    196     Z.hi ^= Htable[nlo].hi;
    197     Z.lo ^= Htable[nlo].lo;
    198   }
    199 
    200   if (is_endian.little) {
    201 #ifdef BSWAP8
    202     Xi[0] = BSWAP8(Z.hi);
    203     Xi[1] = BSWAP8(Z.lo);
    204 #else
    205     uint8_t *p = (uint8_t *)Xi;
    206     uint32_t v;
    207     v = (uint32_t)(Z.hi >> 32);
    208     PUTU32(p, v);
    209     v = (uint32_t)(Z.hi);
    210     PUTU32(p + 4, v);
    211     v = (uint32_t)(Z.lo >> 32);
    212     PUTU32(p + 8, v);
    213     v = (uint32_t)(Z.lo);
    214     PUTU32(p + 12, v);
    215 #endif
    216   } else {
    217     Xi[0] = Z.hi;
    218     Xi[1] = Z.lo;
    219   }
    220 }
    221 
    222 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
    223  * details... Compiler-generated code doesn't seem to give any
    224  * performance improvement, at least not on x86[_64]. It's here
    225  * mostly as reference and a placeholder for possible future
    226  * non-trivial optimization[s]... */
    227 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    228                            size_t len) {
    229   u128 Z;
    230   int cnt;
    231   size_t rem, nlo, nhi;
    232   const union {
    233     long one;
    234     char little;
    235   } is_endian = {1};
    236 
    237   do {
    238     cnt = 15;
    239     nlo = ((const uint8_t *)Xi)[15];
    240     nlo ^= inp[15];
    241     nhi = nlo >> 4;
    242     nlo &= 0xf;
    243 
    244     Z.hi = Htable[nlo].hi;
    245     Z.lo = Htable[nlo].lo;
    246 
    247     while (1) {
    248       rem = (size_t)Z.lo & 0xf;
    249       Z.lo = (Z.hi << 60) | (Z.lo >> 4);
    250       Z.hi = (Z.hi >> 4);
    251       if (sizeof(size_t) == 8) {
    252         Z.hi ^= rem_4bit[rem];
    253       } else {
    254         Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
    255       }
    256 
    257       Z.hi ^= Htable[nhi].hi;
    258       Z.lo ^= Htable[nhi].lo;
    259 
    260       if (--cnt < 0) {
    261         break;
    262       }
    263 
    264       nlo = ((const uint8_t *)Xi)[cnt];
    265       nlo ^= inp[cnt];
    266       nhi = nlo >> 4;
    267       nlo &= 0xf;
    268 
    269       rem = (size_t)Z.lo & 0xf;
    270       Z.lo = (Z.hi << 60) | (Z.lo >> 4);
    271       Z.hi = (Z.hi >> 4);
    272       if (sizeof(size_t) == 8) {
    273         Z.hi ^= rem_4bit[rem];
    274       } else {
    275         Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
    276       }
    277 
    278       Z.hi ^= Htable[nlo].hi;
    279       Z.lo ^= Htable[nlo].lo;
    280     }
    281 
    282     if (is_endian.little) {
    283 #ifdef BSWAP8
    284       Xi[0] = BSWAP8(Z.hi);
    285       Xi[1] = BSWAP8(Z.lo);
    286 #else
    287       uint8_t *p = (uint8_t *)Xi;
    288       uint32_t v;
    289       v = (uint32_t)(Z.hi >> 32);
    290       PUTU32(p, v);
    291       v = (uint32_t)(Z.hi);
    292       PUTU32(p + 4, v);
    293       v = (uint32_t)(Z.lo >> 32);
    294       PUTU32(p + 8, v);
    295       v = (uint32_t)(Z.lo);
    296       PUTU32(p + 12, v);
    297 #endif
    298     } else {
    299       Xi[0] = Z.hi;
    300       Xi[1] = Z.lo;
    301     }
    302   } while (inp += 16, len -= 16);
    303 }
    304 #else /* GHASH_ASM */
    305 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
    306 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    307                     size_t len);
    308 #endif
    309 
    310 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
    311 #if defined(GHASH_ASM)
    312 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
    313 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
    314  * trashing effect. In other words idea is to hash data while it's
    315  * still in L1 cache after encryption pass... */
    316 #define GHASH_CHUNK (3 * 1024)
    317 #endif
    318 
    319 
    320 #if defined(GHASH_ASM)
    321 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
    322 #define GHASH_ASM_X86_OR_64
    323 #define GCM_FUNCREF_4BIT
    324 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
    325 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
    326 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    327                      size_t len);
    328 
    329 #if defined(OPENSSL_X86)
    330 #define gcm_init_avx gcm_init_clmul
    331 #define gcm_gmult_avx gcm_gmult_clmul
    332 #define gcm_ghash_avx gcm_ghash_clmul
    333 #else
    334 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
    335 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
    336 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
    337 #endif
    338 
    339 #if defined(OPENSSL_X86)
    340 #define GHASH_ASM_X86
    341 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
    342 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    343                         size_t len);
    344 
    345 void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
    346 void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    347                         size_t len);
    348 #endif
    349 #elif defined(OPENSSL_ARM)
    350 #include "../arm_arch.h"
    351 #if __ARM_ARCH__ >= 7
    352 #define GHASH_ASM_ARM
    353 #define GCM_FUNCREF_4BIT
    354 void gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]);
    355 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
    356 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    357                     size_t len);
    358 #endif
    359 #endif
    360 #endif
    361 
    362 #ifdef GCM_FUNCREF_4BIT
    363 #undef GCM_MUL
    364 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
    365 #ifdef GHASH
    366 #undef GHASH
    367 #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
    368 #endif
    369 #endif
    370 
    371 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) {
    372   GCM128_CONTEXT *ret;
    373 
    374   ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT));
    375   if (ret != NULL) {
    376     CRYPTO_gcm128_init(ret, key, block);
    377   }
    378 
    379   return ret;
    380 }
    381 
    382 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) {
    383   const union {
    384     long one;
    385     char little;
    386   } is_endian = {1};
    387 
    388   memset(ctx, 0, sizeof(*ctx));
    389   ctx->block = block;
    390   ctx->key = key;
    391 
    392   (*block)(ctx->H.c, ctx->H.c, key);
    393 
    394   if (is_endian.little) {
    395 /* H is stored in host byte order */
    396 #ifdef BSWAP8
    397     ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
    398     ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
    399 #else
    400     uint8_t *p = ctx->H.c;
    401     uint64_t hi, lo;
    402     hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
    403     lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
    404     ctx->H.u[0] = hi;
    405     ctx->H.u[1] = lo;
    406 #endif
    407   }
    408 
    409 #if defined(GHASH_ASM_X86_OR_64)
    410   if (crypto_gcm_clmul_enabled()) {
    411     if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
    412       gcm_init_avx(ctx->Htable, ctx->H.u);
    413       ctx->gmult = gcm_gmult_avx;
    414       ctx->ghash = gcm_ghash_avx;
    415     } else {
    416       gcm_init_clmul(ctx->Htable, ctx->H.u);
    417       ctx->gmult = gcm_gmult_clmul;
    418       ctx->ghash = gcm_ghash_clmul;
    419     }
    420     return;
    421   }
    422   gcm_init_4bit(ctx->Htable, ctx->H.u);
    423 #if defined(GHASH_ASM_X86) /* x86 only */
    424   if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
    425     ctx->gmult = gcm_gmult_4bit_mmx;
    426     ctx->ghash = gcm_ghash_4bit_mmx;
    427   } else {
    428     ctx->gmult = gcm_gmult_4bit_x86;
    429     ctx->ghash = gcm_ghash_4bit_x86;
    430   }
    431 #else
    432   ctx->gmult = gcm_gmult_4bit;
    433   ctx->ghash = gcm_ghash_4bit;
    434 #endif
    435 #elif defined(GHASH_ASM_ARM)
    436   if (CRYPTO_is_NEON_capable()) {
    437     gcm_init_neon(ctx->Htable,ctx->H.u);
    438     ctx->gmult = gcm_gmult_neon;
    439     ctx->ghash = gcm_ghash_neon;
    440   } else {
    441     gcm_init_4bit(ctx->Htable, ctx->H.u);
    442     ctx->gmult = gcm_gmult_4bit;
    443     ctx->ghash = gcm_ghash_4bit;
    444   }
    445 #else
    446   ctx->gmult = gcm_gmult_4bit;
    447   ctx->ghash = gcm_ghash_4bit;
    448   gcm_init_4bit(ctx->Htable, ctx->H.u);
    449 #endif
    450 }
    451 
    452 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) {
    453   const union {
    454     long one;
    455     char little;
    456   } is_endian = {1};
    457   unsigned int ctr;
    458 #ifdef GCM_FUNCREF_4BIT
    459   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
    460 #endif
    461 
    462   ctx->Yi.u[0] = 0;
    463   ctx->Yi.u[1] = 0;
    464   ctx->Xi.u[0] = 0;
    465   ctx->Xi.u[1] = 0;
    466   ctx->len.u[0] = 0; /* AAD length */
    467   ctx->len.u[1] = 0; /* message length */
    468   ctx->ares = 0;
    469   ctx->mres = 0;
    470 
    471   if (len == 12) {
    472     memcpy(ctx->Yi.c, iv, 12);
    473     ctx->Yi.c[15] = 1;
    474     ctr = 1;
    475   } else {
    476     size_t i;
    477     uint64_t len0 = len;
    478 
    479     while (len >= 16) {
    480       for (i = 0; i < 16; ++i) {
    481         ctx->Yi.c[i] ^= iv[i];
    482       }
    483       GCM_MUL(ctx, Yi);
    484       iv += 16;
    485       len -= 16;
    486     }
    487     if (len) {
    488       for (i = 0; i < len; ++i) {
    489         ctx->Yi.c[i] ^= iv[i];
    490       }
    491       GCM_MUL(ctx, Yi);
    492     }
    493     len0 <<= 3;
    494     if (is_endian.little) {
    495 #ifdef BSWAP8
    496       ctx->Yi.u[1] ^= BSWAP8(len0);
    497 #else
    498       ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
    499       ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
    500       ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
    501       ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
    502       ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
    503       ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
    504       ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
    505       ctx->Yi.c[15] ^= (uint8_t)(len0);
    506 #endif
    507     } else {
    508       ctx->Yi.u[1] ^= len0;
    509     }
    510 
    511     GCM_MUL(ctx, Yi);
    512 
    513     if (is_endian.little) {
    514       ctr = GETU32(ctx->Yi.c + 12);
    515     } else {
    516       ctr = ctx->Yi.d[3];
    517     }
    518   }
    519 
    520   (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
    521   ++ctr;
    522   if (is_endian.little) {
    523     PUTU32(ctx->Yi.c + 12, ctr);
    524   } else {
    525     ctx->Yi.d[3] = ctr;
    526   }
    527 }
    528 
    529 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
    530   size_t i;
    531   unsigned int n;
    532   uint64_t alen = ctx->len.u[0];
    533 #ifdef GCM_FUNCREF_4BIT
    534   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
    535 #ifdef GHASH
    536   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    537                       size_t len) = ctx->ghash;
    538 #endif
    539 #endif
    540 
    541   if (ctx->len.u[1]) {
    542     return 0;
    543   }
    544 
    545   alen += len;
    546   if (alen > (OPENSSL_U64(1) << 61) || (sizeof(len) == 8 && alen < len)) {
    547     return 0;
    548   }
    549   ctx->len.u[0] = alen;
    550 
    551   n = ctx->ares;
    552   if (n) {
    553     while (n && len) {
    554       ctx->Xi.c[n] ^= *(aad++);
    555       --len;
    556       n = (n + 1) % 16;
    557     }
    558     if (n == 0) {
    559       GCM_MUL(ctx, Xi);
    560     } else {
    561       ctx->ares = n;
    562       return 1;
    563     }
    564   }
    565 
    566 #ifdef GHASH
    567   if ((i = (len & (size_t) - 16))) {
    568     GHASH(ctx, aad, i);
    569     aad += i;
    570     len -= i;
    571   }
    572 #else
    573   while (len >= 16) {
    574     for (i = 0; i < 16; ++i) {
    575       ctx->Xi.c[i] ^= aad[i];
    576     }
    577     GCM_MUL(ctx, Xi);
    578     aad += 16;
    579     len -= 16;
    580   }
    581 #endif
    582   if (len) {
    583     n = (unsigned int)len;
    584     for (i = 0; i < len; ++i)
    585       ctx->Xi.c[i] ^= aad[i];
    586   }
    587 
    588   ctx->ares = n;
    589   return 1;
    590 }
    591 
    592 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
    593                           unsigned char *out, size_t len) {
    594   const union {
    595     long one;
    596     char little;
    597   } is_endian = {1};
    598   unsigned int n, ctr;
    599   size_t i;
    600   uint64_t mlen = ctx->len.u[1];
    601   block128_f block = ctx->block;
    602   void *key = ctx->key;
    603 #ifdef GCM_FUNCREF_4BIT
    604   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
    605 #ifdef GHASH
    606   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    607                       size_t len) = ctx->ghash;
    608 #endif
    609 #endif
    610 
    611   mlen += len;
    612   if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
    613       (sizeof(len) == 8 && mlen < len)) {
    614     return 0;
    615   }
    616   ctx->len.u[1] = mlen;
    617 
    618   if (ctx->ares) {
    619     /* First call to encrypt finalizes GHASH(AAD) */
    620     GCM_MUL(ctx, Xi);
    621     ctx->ares = 0;
    622   }
    623 
    624   if (is_endian.little) {
    625     ctr = GETU32(ctx->Yi.c + 12);
    626   } else {
    627     ctr = ctx->Yi.d[3];
    628   }
    629 
    630   n = ctx->mres;
    631   if (n) {
    632     while (n && len) {
    633       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
    634       --len;
    635       n = (n + 1) % 16;
    636     }
    637     if (n == 0) {
    638       GCM_MUL(ctx, Xi);
    639     } else {
    640       ctx->mres = n;
    641       return 1;
    642     }
    643   }
    644   if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
    645     for (i = 0; i < len; ++i) {
    646       if (n == 0) {
    647         (*block)(ctx->Yi.c, ctx->EKi.c, key);
    648         ++ctr;
    649         if (is_endian.little) {
    650           PUTU32(ctx->Yi.c + 12, ctr);
    651         } else {
    652           ctx->Yi.d[3] = ctr;
    653         }
    654       }
    655       ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
    656       n = (n + 1) % 16;
    657       if (n == 0) {
    658         GCM_MUL(ctx, Xi);
    659       }
    660     }
    661 
    662     ctx->mres = n;
    663     return 1;
    664   }
    665 #if defined(GHASH) && defined(GHASH_CHUNK)
    666   while (len >= GHASH_CHUNK) {
    667     size_t j = GHASH_CHUNK;
    668 
    669     while (j) {
    670       size_t *out_t = (size_t *)out;
    671       const size_t *in_t = (const size_t *)in;
    672 
    673       (*block)(ctx->Yi.c, ctx->EKi.c, key);
    674       ++ctr;
    675       if (is_endian.little) {
    676         PUTU32(ctx->Yi.c + 12, ctr);
    677       } else {
    678         ctx->Yi.d[3] = ctr;
    679       }
    680       for (i = 0; i < 16 / sizeof(size_t); ++i) {
    681         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
    682       }
    683       out += 16;
    684       in += 16;
    685       j -= 16;
    686     }
    687     GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
    688     len -= GHASH_CHUNK;
    689   }
    690   if ((i = (len & (size_t) - 16))) {
    691     size_t j = i;
    692 
    693     while (len >= 16) {
    694       size_t *out_t = (size_t *)out;
    695       const size_t *in_t = (const size_t *)in;
    696 
    697       (*block)(ctx->Yi.c, ctx->EKi.c, key);
    698       ++ctr;
    699       if (is_endian.little) {
    700         PUTU32(ctx->Yi.c + 12, ctr);
    701       } else {
    702         ctx->Yi.d[3] = ctr;
    703       }
    704       for (i = 0; i < 16 / sizeof(size_t); ++i) {
    705         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
    706       }
    707       out += 16;
    708       in += 16;
    709       len -= 16;
    710     }
    711     GHASH(ctx, out - j, j);
    712   }
    713 #else
    714   while (len >= 16) {
    715     size_t *out_t = (size_t *)out;
    716     const size_t *in_t = (const size_t *)in;
    717 
    718     (*block)(ctx->Yi.c, ctx->EKi.c, key);
    719     ++ctr;
    720     if (is_endian.little) {
    721       PUTU32(ctx->Yi.c + 12, ctr);
    722     } else {
    723       ctx->Yi.d[3] = ctr;
    724     }
    725     for (i = 0; i < 16 / sizeof(size_t); ++i) {
    726       ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
    727     }
    728     GCM_MUL(ctx, Xi);
    729     out += 16;
    730     in += 16;
    731     len -= 16;
    732   }
    733 #endif
    734   if (len) {
    735     (*block)(ctx->Yi.c, ctx->EKi.c, key);
    736     ++ctr;
    737     if (is_endian.little) {
    738       PUTU32(ctx->Yi.c + 12, ctr);
    739     } else {
    740       ctx->Yi.d[3] = ctr;
    741     }
    742     while (len--) {
    743       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
    744       ++n;
    745     }
    746   }
    747 
    748   ctx->mres = n;
    749   return 1;
    750 }
    751 
    752 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
    753                           unsigned char *out, size_t len) {
    754   const union {
    755     long one;
    756     char little;
    757   } is_endian = {1};
    758   unsigned int n, ctr;
    759   size_t i;
    760   uint64_t mlen = ctx->len.u[1];
    761   block128_f block = ctx->block;
    762   void *key = ctx->key;
    763 #ifdef GCM_FUNCREF_4BIT
    764   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
    765 #ifdef GHASH
    766   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    767                       size_t len) = ctx->ghash;
    768 #endif
    769 #endif
    770 
    771   mlen += len;
    772   if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
    773       (sizeof(len) == 8 && mlen < len)) {
    774     return 0;
    775   }
    776   ctx->len.u[1] = mlen;
    777 
    778   if (ctx->ares) {
    779     /* First call to decrypt finalizes GHASH(AAD) */
    780     GCM_MUL(ctx, Xi);
    781     ctx->ares = 0;
    782   }
    783 
    784   if (is_endian.little) {
    785     ctr = GETU32(ctx->Yi.c + 12);
    786   } else {
    787     ctr = ctx->Yi.d[3];
    788   }
    789 
    790   n = ctx->mres;
    791   if (n) {
    792     while (n && len) {
    793       uint8_t c = *(in++);
    794       *(out++) = c ^ ctx->EKi.c[n];
    795       ctx->Xi.c[n] ^= c;
    796       --len;
    797       n = (n + 1) % 16;
    798     }
    799     if (n == 0) {
    800       GCM_MUL(ctx, Xi);
    801     } else {
    802       ctx->mres = n;
    803       return 1;
    804     }
    805   }
    806   if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
    807     for (i = 0; i < len; ++i) {
    808       uint8_t c;
    809       if (n == 0) {
    810         (*block)(ctx->Yi.c, ctx->EKi.c, key);
    811         ++ctr;
    812         if (is_endian.little) {
    813           PUTU32(ctx->Yi.c + 12, ctr);
    814         } else {
    815           ctx->Yi.d[3] = ctr;
    816         }
    817       }
    818       c = in[i];
    819       out[i] = c ^ ctx->EKi.c[n];
    820       ctx->Xi.c[n] ^= c;
    821       n = (n + 1) % 16;
    822       if (n == 0) {
    823         GCM_MUL(ctx, Xi);
    824       }
    825     }
    826 
    827     ctx->mres = n;
    828     return 1;
    829   }
    830 #if defined(GHASH) && defined(GHASH_CHUNK)
    831   while (len >= GHASH_CHUNK) {
    832     size_t j = GHASH_CHUNK;
    833 
    834     GHASH(ctx, in, GHASH_CHUNK);
    835     while (j) {
    836       size_t *out_t = (size_t *)out;
    837       const size_t *in_t = (const size_t *)in;
    838 
    839       (*block)(ctx->Yi.c, ctx->EKi.c, key);
    840       ++ctr;
    841       if (is_endian.little) {
    842         PUTU32(ctx->Yi.c + 12, ctr);
    843       } else {
    844         ctx->Yi.d[3] = ctr;
    845       }
    846       for (i = 0; i < 16 / sizeof(size_t); ++i) {
    847         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
    848       }
    849       out += 16;
    850       in += 16;
    851       j -= 16;
    852     }
    853     len -= GHASH_CHUNK;
    854   }
    855   if ((i = (len & (size_t) - 16))) {
    856     GHASH(ctx, in, i);
    857     while (len >= 16) {
    858       size_t *out_t = (size_t *)out;
    859       const size_t *in_t = (const size_t *)in;
    860 
    861       (*block)(ctx->Yi.c, ctx->EKi.c, key);
    862       ++ctr;
    863       if (is_endian.little) {
    864         PUTU32(ctx->Yi.c + 12, ctr);
    865       } else {
    866         ctx->Yi.d[3] = ctr;
    867       }
    868       for (i = 0; i < 16 / sizeof(size_t); ++i) {
    869         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
    870       }
    871       out += 16;
    872       in += 16;
    873       len -= 16;
    874     }
    875   }
    876 #else
    877   while (len >= 16) {
    878     size_t *out_t = (size_t *)out;
    879     const size_t *in_t = (const size_t *)in;
    880 
    881     (*block)(ctx->Yi.c, ctx->EKi.c, key);
    882     ++ctr;
    883     if (is_endian.little) {
    884       PUTU32(ctx->Yi.c + 12, ctr);
    885     } else {
    886       ctx->Yi.d[3] = ctr;
    887     }
    888     for (i = 0; i < 16 / sizeof(size_t); ++i) {
    889       size_t c = in_t[i];
    890       out_t[i] = c ^ ctx->EKi.t[i];
    891       ctx->Xi.t[i] ^= c;
    892     }
    893     GCM_MUL(ctx, Xi);
    894     out += 16;
    895     in += 16;
    896     len -= 16;
    897   }
    898 #endif
    899   if (len) {
    900     (*block)(ctx->Yi.c, ctx->EKi.c, key);
    901     ++ctr;
    902     if (is_endian.little) {
    903       PUTU32(ctx->Yi.c + 12, ctr);
    904     } else {
    905       ctx->Yi.d[3] = ctr;
    906     }
    907     while (len--) {
    908       uint8_t c = in[n];
    909       ctx->Xi.c[n] ^= c;
    910       out[n] = c ^ ctx->EKi.c[n];
    911       ++n;
    912     }
    913   }
    914 
    915   ctx->mres = n;
    916   return 1;
    917 }
    918 
    919 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
    920                                 uint8_t *out, size_t len, ctr128_f stream) {
    921   const union {
    922     long one;
    923     char little;
    924   } is_endian = {1};
    925   unsigned int n, ctr;
    926   size_t i;
    927   uint64_t mlen = ctx->len.u[1];
    928   void *key = ctx->key;
    929 #ifdef GCM_FUNCREF_4BIT
    930   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
    931 #ifdef GHASH
    932   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
    933                       size_t len) = ctx->ghash;
    934 #endif
    935 #endif
    936 
    937   mlen += len;
    938   if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
    939       (sizeof(len) == 8 && mlen < len)) {
    940     return 0;
    941   }
    942   ctx->len.u[1] = mlen;
    943 
    944   if (ctx->ares) {
    945     /* First call to encrypt finalizes GHASH(AAD) */
    946     GCM_MUL(ctx, Xi);
    947     ctx->ares = 0;
    948   }
    949 
    950   if (is_endian.little) {
    951     ctr = GETU32(ctx->Yi.c + 12);
    952   } else {
    953     ctr = ctx->Yi.d[3];
    954   }
    955 
    956   n = ctx->mres;
    957   if (n) {
    958     while (n && len) {
    959       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
    960       --len;
    961       n = (n + 1) % 16;
    962     }
    963     if (n == 0) {
    964       GCM_MUL(ctx, Xi);
    965     } else {
    966       ctx->mres = n;
    967       return 1;
    968     }
    969   }
    970 #if defined(GHASH)
    971   while (len >= GHASH_CHUNK) {
    972     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
    973     ctr += GHASH_CHUNK / 16;
    974     if (is_endian.little) {
    975       PUTU32(ctx->Yi.c + 12, ctr);
    976     } else {
    977       ctx->Yi.d[3] = ctr;
    978     }
    979     GHASH(ctx, out, GHASH_CHUNK);
    980     out += GHASH_CHUNK;
    981     in += GHASH_CHUNK;
    982     len -= GHASH_CHUNK;
    983   }
    984 #endif
    985   if ((i = (len & (size_t) - 16))) {
    986     size_t j = i / 16;
    987 
    988     (*stream)(in, out, j, key, ctx->Yi.c);
    989     ctr += (unsigned int)j;
    990     if (is_endian.little) {
    991       PUTU32(ctx->Yi.c + 12, ctr);
    992     } else {
    993       ctx->Yi.d[3] = ctr;
    994     }
    995     in += i;
    996     len -= i;
    997 #if defined(GHASH)
    998     GHASH(ctx, out, i);
    999     out += i;
   1000 #else
   1001     while (j--) {
   1002       for (i = 0; i < 16; ++i) {
   1003         ctx->Xi.c[i] ^= out[i];
   1004       }
   1005       GCM_MUL(ctx, Xi);
   1006       out += 16;
   1007     }
   1008 #endif
   1009   }
   1010   if (len) {
   1011     (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
   1012     ++ctr;
   1013     if (is_endian.little) {
   1014       PUTU32(ctx->Yi.c + 12, ctr);
   1015     } else {
   1016       ctx->Yi.d[3] = ctr;
   1017     }
   1018     while (len--) {
   1019       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
   1020       ++n;
   1021     }
   1022   }
   1023 
   1024   ctx->mres = n;
   1025   return 1;
   1026 }
   1027 
   1028 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
   1029                                 uint8_t *out, size_t len,
   1030                                 ctr128_f stream) {
   1031   const union {
   1032     long one;
   1033     char little;
   1034   } is_endian = {1};
   1035   unsigned int n, ctr;
   1036   size_t i;
   1037   uint64_t mlen = ctx->len.u[1];
   1038   void *key = ctx->key;
   1039 #ifdef GCM_FUNCREF_4BIT
   1040   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
   1041 #ifdef GHASH
   1042   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
   1043                       size_t len) = ctx->ghash;
   1044 #endif
   1045 #endif
   1046 
   1047   mlen += len;
   1048   if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
   1049       (sizeof(len) == 8 && mlen < len)) {
   1050     return 0;
   1051   }
   1052   ctx->len.u[1] = mlen;
   1053 
   1054   if (ctx->ares) {
   1055     /* First call to decrypt finalizes GHASH(AAD) */
   1056     GCM_MUL(ctx, Xi);
   1057     ctx->ares = 0;
   1058   }
   1059 
   1060   if (is_endian.little) {
   1061     ctr = GETU32(ctx->Yi.c + 12);
   1062   } else {
   1063     ctr = ctx->Yi.d[3];
   1064   }
   1065 
   1066   n = ctx->mres;
   1067   if (n) {
   1068     while (n && len) {
   1069       uint8_t c = *(in++);
   1070       *(out++) = c ^ ctx->EKi.c[n];
   1071       ctx->Xi.c[n] ^= c;
   1072       --len;
   1073       n = (n + 1) % 16;
   1074     }
   1075     if (n == 0) {
   1076       GCM_MUL(ctx, Xi);
   1077     } else {
   1078       ctx->mres = n;
   1079       return 1;
   1080     }
   1081   }
   1082 #if defined(GHASH)
   1083   while (len >= GHASH_CHUNK) {
   1084     GHASH(ctx, in, GHASH_CHUNK);
   1085     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
   1086     ctr += GHASH_CHUNK / 16;
   1087     if (is_endian.little)
   1088       PUTU32(ctx->Yi.c + 12, ctr);
   1089     else
   1090       ctx->Yi.d[3] = ctr;
   1091     out += GHASH_CHUNK;
   1092     in += GHASH_CHUNK;
   1093     len -= GHASH_CHUNK;
   1094   }
   1095 #endif
   1096   if ((i = (len & (size_t) - 16))) {
   1097     size_t j = i / 16;
   1098 
   1099 #if defined(GHASH)
   1100     GHASH(ctx, in, i);
   1101 #else
   1102     while (j--) {
   1103       size_t k;
   1104       for (k = 0; k < 16; ++k)
   1105         ctx->Xi.c[k] ^= in[k];
   1106       GCM_MUL(ctx, Xi);
   1107       in += 16;
   1108     }
   1109     j = i / 16;
   1110     in -= i;
   1111 #endif
   1112     (*stream)(in, out, j, key, ctx->Yi.c);
   1113     ctr += (unsigned int)j;
   1114     if (is_endian.little)
   1115       PUTU32(ctx->Yi.c + 12, ctr);
   1116     else
   1117       ctx->Yi.d[3] = ctr;
   1118     out += i;
   1119     in += i;
   1120     len -= i;
   1121   }
   1122   if (len) {
   1123     (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
   1124     ++ctr;
   1125     if (is_endian.little)
   1126       PUTU32(ctx->Yi.c + 12, ctr);
   1127     else
   1128       ctx->Yi.d[3] = ctr;
   1129     while (len--) {
   1130       uint8_t c = in[n];
   1131       ctx->Xi.c[n] ^= c;
   1132       out[n] = c ^ ctx->EKi.c[n];
   1133       ++n;
   1134     }
   1135   }
   1136 
   1137   ctx->mres = n;
   1138   return 1;
   1139 }
   1140 
   1141 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
   1142   const union {
   1143     long one;
   1144     char little;
   1145   } is_endian = {1};
   1146   uint64_t alen = ctx->len.u[0] << 3;
   1147   uint64_t clen = ctx->len.u[1] << 3;
   1148 #ifdef GCM_FUNCREF_4BIT
   1149   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
   1150 #endif
   1151 
   1152   if (ctx->mres || ctx->ares) {
   1153     GCM_MUL(ctx, Xi);
   1154   }
   1155 
   1156   if (is_endian.little) {
   1157 #ifdef BSWAP8
   1158     alen = BSWAP8(alen);
   1159     clen = BSWAP8(clen);
   1160 #else
   1161     uint8_t *p = ctx->len.c;
   1162 
   1163     ctx->len.u[0] = alen;
   1164     ctx->len.u[1] = clen;
   1165 
   1166     alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
   1167     clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
   1168 #endif
   1169   }
   1170 
   1171   ctx->Xi.u[0] ^= alen;
   1172   ctx->Xi.u[1] ^= clen;
   1173   GCM_MUL(ctx, Xi);
   1174 
   1175   ctx->Xi.u[0] ^= ctx->EK0.u[0];
   1176   ctx->Xi.u[1] ^= ctx->EK0.u[1];
   1177 
   1178   if (tag && len <= sizeof(ctx->Xi)) {
   1179     return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
   1180   } else {
   1181     return 0;
   1182   }
   1183 }
   1184 
   1185 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
   1186   CRYPTO_gcm128_finish(ctx, NULL, 0);
   1187   memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
   1188 }
   1189 
   1190 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
   1191   if (ctx) {
   1192     OPENSSL_cleanse(ctx, sizeof(*ctx));
   1193     OPENSSL_free(ctx);
   1194   }
   1195 }
   1196 
   1197 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
   1198 int crypto_gcm_clmul_enabled(void) {
   1199 #ifdef GHASH_ASM
   1200   return OPENSSL_ia32cap_P[0] & (1 << 24) &&  /* check FXSR bit */
   1201     OPENSSL_ia32cap_P[1] & (1 << 1);  /* check PCLMULQDQ bit */
   1202 #else
   1203   return 0;
   1204 #endif
   1205 }
   1206 #endif
   1207