Home | History | Annotate | Download | only in crypto
      1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      2  * All rights reserved.
      3  *
      4  * This package is an SSL implementation written
      5  * by Eric Young (eay (at) cryptsoft.com).
      6  * The implementation was written so as to conform with Netscapes SSL.
      7  *
      8  * This library is free for commercial and non-commercial use as long as
      9  * the following conditions are aheared to.  The following conditions
     10  * apply to all code found in this distribution, be it the RC4, RSA,
     11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     12  * included with this distribution is covered by the same copyright terms
     13  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     14  *
     15  * Copyright remains Eric Young's, and as such any Copyright notices in
     16  * the code are not to be removed.
     17  * If this package is used in a product, Eric Young should be given attribution
     18  * as the author of the parts of the library used.
     19  * This can be in the form of a textual message at program startup or
     20  * in documentation (online or textual) provided with the package.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *    "This product includes cryptographic software written by
     33  *     Eric Young (eay (at) cryptsoft.com)"
     34  *    The word 'cryptographic' can be left out if the rouines from the library
     35  *    being used are not cryptographic related :-).
     36  * 4. If you include any Windows specific code (or a derivative thereof) from
     37  *    the apps directory (application code) you must include an acknowledgement:
     38  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     39  *
     40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     50  * SUCH DAMAGE.
     51  *
     52  * The licence and distribution terms for any publically available version or
     53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     54  * copied and put under another distribution licence
     55  * [including the GNU Public Licence.] */
     56 
     57 #if !defined(__STDC_FORMAT_MACROS)
     58 #define __STDC_FORMAT_MACROS
     59 #endif
     60 
     61 #include <openssl/cpu.h>
     62 
     63 
     64 #if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
     65 
     66 #include <inttypes.h>
     67 #include <stdio.h>
     68 #include <stdlib.h>
     69 #include <string.h>
     70 
     71 #if defined(_MSC_VER)
     72 OPENSSL_MSVC_PRAGMA(warning(push, 3))
     73 #include <immintrin.h>
     74 #include <intrin.h>
     75 OPENSSL_MSVC_PRAGMA(warning(pop))
     76 #endif
     77 
     78 #include "internal.h"
     79 
     80 
     81 // OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
     82 // is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
     83 // |*out_edx|.
     84 static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
     85                           uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
     86 #if defined(_MSC_VER)
     87   int tmp[4];
     88   __cpuid(tmp, (int)leaf);
     89   *out_eax = (uint32_t)tmp[0];
     90   *out_ebx = (uint32_t)tmp[1];
     91   *out_ecx = (uint32_t)tmp[2];
     92   *out_edx = (uint32_t)tmp[3];
     93 #elif defined(__pic__) && defined(OPENSSL_32_BIT)
     94   // Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
     95   // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602.
     96   __asm__ volatile (
     97     "xor %%ecx, %%ecx\n"
     98     "mov %%ebx, %%edi\n"
     99     "cpuid\n"
    100     "xchg %%edi, %%ebx\n"
    101     : "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
    102     : "a"(leaf)
    103   );
    104 #else
    105   __asm__ volatile (
    106     "xor %%ecx, %%ecx\n"
    107     "cpuid\n"
    108     : "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
    109     : "a"(leaf)
    110   );
    111 #endif
    112 }
    113 
    114 // OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
    115 // Currently only XCR0 is defined by Intel so |xcr| should always be zero.
    116 static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
    117 #if defined(_MSC_VER)
    118   return (uint64_t)_xgetbv(xcr);
    119 #else
    120   uint32_t eax, edx;
    121   __asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
    122   return (((uint64_t)edx) << 32) | eax;
    123 #endif
    124 }
    125 
    126 // handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|
    127 // and |out[1]|. See the comment in |OPENSSL_cpuid_setup| about this.
    128 static void handle_cpu_env(uint32_t *out, const char *in) {
    129   const int invert = in[0] == '~';
    130   uint64_t v;
    131 
    132   if (!sscanf(in + invert, "%" PRIu64, &v)) {
    133     return;
    134   }
    135 
    136   if (invert) {
    137     out[0] &= ~v;
    138     out[1] &= ~(v >> 32);
    139   } else {
    140     out[0] = v;
    141     out[1] = v >> 32;
    142   }
    143 }
    144 
    145 void OPENSSL_cpuid_setup(void) {
    146   // Determine the vendor and maximum input value.
    147   uint32_t eax, ebx, ecx, edx;
    148   OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
    149 
    150   uint32_t num_ids = eax;
    151 
    152   int is_intel = ebx == 0x756e6547 /* Genu */ &&
    153                  edx == 0x49656e69 /* ineI */ &&
    154                  ecx == 0x6c65746e /* ntel */;
    155   int is_amd = ebx == 0x68747541 /* Auth */ &&
    156                edx == 0x69746e65 /* enti */ &&
    157                ecx == 0x444d4163 /* cAMD */;
    158 
    159   int has_amd_xop = 0;
    160   if (is_amd) {
    161     // AMD-specific logic.
    162     // See http://developer.amd.com/wordpress/media/2012/10/254811.pdf
    163     OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000);
    164     uint32_t num_extended_ids = eax;
    165     if (num_extended_ids >= 0x80000001) {
    166       OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001);
    167       if (ecx & (1u << 11)) {
    168         has_amd_xop = 1;
    169       }
    170     }
    171   }
    172 
    173   uint32_t extended_features = 0;
    174   if (num_ids >= 7) {
    175     OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7);
    176     extended_features = ebx;
    177   }
    178 
    179   // Determine the number of cores sharing an L1 data cache to adjust the
    180   // hyper-threading bit.
    181   uint32_t cores_per_cache = 0;
    182   if (is_amd) {
    183     // AMD CPUs never share an L1 data cache between threads but do set the HTT
    184     // bit on multi-core CPUs.
    185     cores_per_cache = 1;
    186   } else if (num_ids >= 4) {
    187     // TODO(davidben): The Intel manual says this CPUID leaf enumerates all
    188     // caches using ECX and doesn't say which is first. Does this matter?
    189     OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
    190     cores_per_cache = 1 + ((eax >> 14) & 0xfff);
    191   }
    192 
    193   OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
    194 
    195   // Adjust the hyper-threading bit.
    196   if (edx & (1u << 28)) {
    197     uint32_t num_logical_cores = (ebx >> 16) & 0xff;
    198     if (cores_per_cache == 1 || num_logical_cores <= 1) {
    199       edx &= ~(1u << 28);
    200     }
    201   }
    202 
    203   // Reserved bit #20 was historically repurposed to control the in-memory
    204   // representation of RC4 state. Always set it to zero.
    205   edx &= ~(1u << 20);
    206 
    207   // Reserved bit #30 is repurposed to signal an Intel CPU.
    208   if (is_intel) {
    209     edx |= (1u << 30);
    210 
    211     // Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
    212     // some Silvermont-specific codepaths which perform better. See OpenSSL
    213     // commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
    214     if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
    215         (eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
    216       ecx &= ~(1u << 26);
    217     }
    218   } else {
    219     edx &= ~(1u << 30);
    220   }
    221 
    222   // The SDBG bit is repurposed to denote AMD XOP support.
    223   if (has_amd_xop) {
    224     ecx |= (1u << 11);
    225   } else {
    226     ecx &= ~(1u << 11);
    227   }
    228 
    229   uint64_t xcr0 = 0;
    230   if (ecx & (1u << 27)) {
    231     // XCR0 may only be queried if the OSXSAVE bit is set.
    232     xcr0 = OPENSSL_xgetbv(0);
    233   }
    234   // See Intel manual, volume 1, section 14.3.
    235   if ((xcr0 & 6) != 6) {
    236     // YMM registers cannot be used.
    237     ecx &= ~(1u << 28);  // AVX
    238     ecx &= ~(1u << 12);  // FMA
    239     ecx &= ~(1u << 11);  // AMD XOP
    240     // Clear AVX2 and AVX512* bits.
    241     //
    242     // TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
    243     // doesn't clear those.
    244     extended_features &=
    245         ~((1u << 5) | (1u << 16) | (1u << 21) | (1u << 30) | (1u << 31));
    246   }
    247   // See Intel manual, volume 1, section 15.2.
    248   if ((xcr0 & 0xe6) != 0xe6) {
    249     // Clear AVX512F. Note we don't touch other AVX512 extensions because they
    250     // can be used with YMM.
    251     extended_features &= ~(1u << 16);
    252   }
    253 
    254   // Disable ADX instructions on Knights Landing. See OpenSSL commit
    255   // 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
    256   if ((ecx & (1u << 26)) == 0) {
    257     extended_features &= ~(1u << 19);
    258   }
    259 
    260   OPENSSL_ia32cap_P[0] = edx;
    261   OPENSSL_ia32cap_P[1] = ecx;
    262   OPENSSL_ia32cap_P[2] = extended_features;
    263   OPENSSL_ia32cap_P[3] = 0;
    264 
    265   const char *env1, *env2;
    266   env1 = getenv("OPENSSL_ia32cap");
    267   if (env1 == NULL) {
    268     return;
    269   }
    270 
    271   // OPENSSL_ia32cap can contain zero, one or two values, separated with a ':'.
    272   // Each value is a 64-bit, unsigned value which may start with "0x" to
    273   // indicate a hex value. Prior to the 64-bit value, a '~' may be given.
    274   //
    275   // If '~' isn't present, then the value is taken as the result of the CPUID.
    276   // Otherwise the value is inverted and ANDed with the probed CPUID result.
    277   //
    278   // The first value determines OPENSSL_ia32cap_P[0] and [1]. The second [2]
    279   // and [3].
    280 
    281   handle_cpu_env(&OPENSSL_ia32cap_P[0], env1);
    282   env2 = strchr(env1, ':');
    283   if (env2 != NULL) {
    284     handle_cpu_env(&OPENSSL_ia32cap_P[2], env2 + 1);
    285   }
    286 }
    287 
    288 #endif  // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64)
    289