Home | History | Annotate | Download | only in src
      1 // Copyright 2018, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifndef VIXL_CPU_FEATURES_H
     28 #define VIXL_CPU_FEATURES_H
     29 
     30 #include <ostream>
     31 
     32 #include "globals-vixl.h"
     33 
     34 namespace vixl {
     35 
     36 
     37 // clang-format off
     38 #define VIXL_CPU_FEATURE_LIST(V)                                               \
     39   /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
     40   /* registers, so that the detailed feature registers can be read          */ \
     41   /* directly.                                                              */ \
     42   V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
     43                                                                                \
     44   V(kFP,                  "FP",                     "fp")                      \
     45   V(kNEON,                "NEON",                   "asimd")                   \
     46   V(kCRC32,               "CRC32",                  "crc32")                   \
     47   /* Cryptographic support instructions.                                    */ \
     48   V(kAES,                 "AES",                    "aes")                     \
     49   V(kSHA1,                "SHA1",                   "sha1")                    \
     50   V(kSHA2,                "SHA2",                   "sha2")                    \
     51   /* A form of PMULL{2} with a 128-bit (1Q) result.                         */ \
     52   V(kPmull1Q,             "Pmull1Q",                "pmull")                   \
     53   /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc.              */ \
     54   V(kAtomics,             "Atomics",                "atomics")                 \
     55   /* Limited ordering regions: LDLAR, STLLR and their variants.             */ \
     56   V(kLORegions,           "LORegions",              NULL)                      \
     57   /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
     58   V(kRDM,                 "RDM",                    "asimdrdm")                \
     59   /* SDOT and UDOT support (in NEON).                                       */ \
     60   V(kDotProduct,          "DotProduct",             "asimddp")                 \
     61   /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
     62   V(kFPHalf,              "FPHalf",                 "fphp")                    \
     63   V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
     64   /* The RAS extension, including the ESB instruction.                      */ \
     65   V(kRAS,                 "RAS",                    NULL)                      \
     66   /* Data cache clean to the point of persistence: DC CVAP.                 */ \
     67   V(kDCPoP,               "DCPoP",                  "dcpop")                   \
     68   /* Cryptographic support instructions.                                    */ \
     69   V(kSHA3,                "SHA3",                   "sha3")                    \
     70   V(kSHA512,              "SHA512",                 "sha512")                  \
     71   V(kSM3,                 "SM3",                    "sm3")                     \
     72   V(kSM4,                 "SM4",                    "sm4")                     \
     73   /* Pointer authentication for addresses.                                  */ \
     74   V(kPAuth,               "PAuth",                  NULL)                      \
     75   /* Pointer authentication for addresses uses QARMA.                       */ \
     76   V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
     77   /* Generic authentication (using the PACGA instruction).                  */ \
     78   V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
     79   /* Generic authentication uses QARMA.                                     */ \
     80   V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
     81   /* JavaScript-style FP <-> integer conversion instruction: FJCVTZS.       */ \
     82   V(kJSCVT,               "JSCVT",                  "jscvt")                   \
     83   /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
     84   V(kRCpc,                "RCpc",                   "lrcpc")                   \
     85   /* Complex number support for NEON: FCMLA and FCADD.                      */ \
     86   V(kFcma,                "Fcma",                   "fcma")
     87 // clang-format on
     88 
     89 
     90 class CPUFeaturesConstIterator;
     91 
     92 // A representation of the set of features known to be supported by the target
     93 // device. Each feature is represented by a simple boolean flag.
     94 //
     95 //   - When the Assembler is asked to assemble an instruction, it asserts (in
     96 //     debug mode) that the necessary features are available.
     97 //
     98 //   - TODO: The MacroAssembler relies on the Assembler's assertions, but in
     99 //     some cases it may be useful for macros to generate a fall-back sequence
    100 //     in case features are not available.
    101 //
    102 //   - The Simulator assumes by default that all features are available, but it
    103 //     is possible to configure it to fail if the simulated code uses features
    104 //     that are not enabled.
    105 //
    106 //     The Simulator also offers pseudo-instructions to allow features to be
    107 //     enabled and disabled dynamically. This is useful when you want to ensure
    108 //     that some features are constrained to certain areas of code.
    109 //
    110 //   - The base Disassembler knows nothing about CPU features, but the
    111 //     PrintDisassembler can be configured to annotate its output with warnings
    112 //     about unavailable features. The Simulator uses this feature when
    113 //     instruction trace is enabled.
    114 //
    115 //   - The Decoder-based components -- the Simulator and PrintDisassembler --
    116 //     rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
    117 //     features actually encountered so that a large block of code can be
    118 //     examined (either directly or through simulation), and the required
    119 //     features analysed later.
    120 //
    121 // Expected usage:
    122 //
    123 //     // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
    124 //     // compatibility with older version of VIXL.
    125 //     MacroAssembler masm;
    126 //
    127 //     // Generate code only for the current CPU.
    128 //     masm.SetCPUFeatures(CPUFeatures::InferFromOS());
    129 //
    130 //     // Turn off feature checking entirely.
    131 //     masm.SetCPUFeatures(CPUFeatures::All());
    132 //
    133 // Feature set manipulation:
    134 //
    135 //     CPUFeatures f;  // The default constructor gives an empty set.
    136 //     // Individual features can be added (or removed).
    137 //     f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
    138 //     f.Remove(CPUFeatures::kNEON);
    139 //
    140 //     // Some helpers exist for extensions that provide several features.
    141 //     f.Remove(CPUFeatures::All());
    142 //     f.Combine(CPUFeatures::AArch64LegacyBaseline());
    143 //
    144 //     // Chained construction is also possible.
    145 //     CPUFeatures g =
    146 //         f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
    147 //
    148 //     // Features can be queried. Where multiple features are given, they are
    149 //     // combined with logical AND.
    150 //     if (h.Has(CPUFeatures::kNEON)) { ... }
    151 //     if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
    152 //     if (h.Has(g)) { ... }
    153 //     // If the empty set is requested, the result is always 'true'.
    154 //     VIXL_ASSERT(h.Has(CPUFeatures()));
    155 //
    156 //     // For debug and reporting purposes, features can be enumerated (or
    157 //     // printed directly):
    158 //     std::cout << CPUFeatures::kNEON;  // Prints something like "NEON".
    159 //     std::cout << f;  // Prints something like "FP, NEON, CRC32".
    160 class CPUFeatures {
    161  public:
    162   // clang-format off
    163   // Individual features.
    164   // These should be treated as opaque tokens. User code should not rely on
    165   // specific numeric values or ordering.
    166   enum Feature {
    167     // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
    168     // this class supports.
    169 
    170     kNone = -1,
    171 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
    172     VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
    173 #undef VIXL_DECLARE_FEATURE
    174     kNumberOfFeatures
    175   };
    176   // clang-format on
    177 
    178   // By default, construct with no features enabled.
    179   CPUFeatures() : features_(0) {}
    180 
    181   // Construct with some features already enabled.
    182   CPUFeatures(Feature feature0,
    183               Feature feature1 = kNone,
    184               Feature feature2 = kNone,
    185               Feature feature3 = kNone);
    186 
    187   // Construct with all features enabled. This can be used to disable feature
    188   // checking: `Has(...)` returns true regardless of the argument.
    189   static CPUFeatures All();
    190 
    191   // Construct an empty CPUFeatures. This is equivalent to the default
    192   // constructor, but is provided for symmetry and convenience.
    193   static CPUFeatures None() { return CPUFeatures(); }
    194 
    195   // The presence of these features was assumed by version of VIXL before this
    196   // API was added, so using this set by default ensures API compatibility.
    197   static CPUFeatures AArch64LegacyBaseline() {
    198     return CPUFeatures(kFP, kNEON, kCRC32);
    199   }
    200 
    201   // Construct a new CPUFeatures object based on what the OS reports.
    202   static CPUFeatures InferFromOS();
    203 
    204   // Combine another CPUFeatures object into this one. Features that already
    205   // exist in this set are left unchanged.
    206   void Combine(const CPUFeatures& other);
    207 
    208   // Combine specific features into this set. Features that already exist in
    209   // this set are left unchanged.
    210   void Combine(Feature feature0,
    211                Feature feature1 = kNone,
    212                Feature feature2 = kNone,
    213                Feature feature3 = kNone);
    214 
    215   // Remove features in another CPUFeatures object from this one.
    216   void Remove(const CPUFeatures& other);
    217 
    218   // Remove specific features from this set.
    219   void Remove(Feature feature0,
    220               Feature feature1 = kNone,
    221               Feature feature2 = kNone,
    222               Feature feature3 = kNone);
    223 
    224   // Chaining helpers for convenient construction.
    225   CPUFeatures With(const CPUFeatures& other) const;
    226   CPUFeatures With(Feature feature0,
    227                    Feature feature1 = kNone,
    228                    Feature feature2 = kNone,
    229                    Feature feature3 = kNone) const;
    230   CPUFeatures Without(const CPUFeatures& other) const;
    231   CPUFeatures Without(Feature feature0,
    232                       Feature feature1 = kNone,
    233                       Feature feature2 = kNone,
    234                       Feature feature3 = kNone) const;
    235 
    236   // Query features.
    237   // Note that an empty query (like `Has(kNone)`) always returns true.
    238   bool Has(const CPUFeatures& other) const;
    239   bool Has(Feature feature0,
    240            Feature feature1 = kNone,
    241            Feature feature2 = kNone,
    242            Feature feature3 = kNone) const;
    243 
    244   // Return the number of enabled features.
    245   size_t Count() const;
    246 
    247   // Check for equivalence.
    248   bool operator==(const CPUFeatures& other) const {
    249     return Has(other) && other.Has(*this);
    250   }
    251   bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
    252 
    253   typedef CPUFeaturesConstIterator const_iterator;
    254 
    255   const_iterator begin() const;
    256   const_iterator end() const;
    257 
    258  private:
    259   // Each bit represents a feature. This field will be replaced as needed if
    260   // features are added.
    261   uint64_t features_;
    262 
    263   friend std::ostream& operator<<(std::ostream& os,
    264                                   const vixl::CPUFeatures& features);
    265 };
    266 
    267 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
    268 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
    269 
    270 // This is not a proper C++ iterator type, but it simulates enough of
    271 // ForwardIterator that simple loops can be written.
    272 class CPUFeaturesConstIterator {
    273  public:
    274   CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
    275                            CPUFeatures::Feature start = CPUFeatures::kNone)
    276       : cpu_features_(cpu_features), feature_(start) {
    277     VIXL_ASSERT(IsValid());
    278   }
    279 
    280   bool operator==(const CPUFeaturesConstIterator& other) const;
    281   bool operator!=(const CPUFeaturesConstIterator& other) const {
    282     return !(*this == other);
    283   }
    284   CPUFeatures::Feature operator++();
    285   CPUFeatures::Feature operator++(int);
    286 
    287   CPUFeatures::Feature operator*() const {
    288     VIXL_ASSERT(IsValid());
    289     return feature_;
    290   }
    291 
    292   // For proper support of C++'s simplest "Iterator" concept, this class would
    293   // have to define member types (such as CPUFeaturesIterator::pointer) to make
    294   // it appear as if it iterates over Feature objects in memory. That is, we'd
    295   // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
    296   // This is at least partially possible -- the std::vector<bool> specialisation
    297   // does something similar -- but it doesn't seem worthwhile for a
    298   // special-purpose debug helper, so they are omitted here.
    299  private:
    300   const CPUFeatures* cpu_features_;
    301   CPUFeatures::Feature feature_;
    302 
    303   bool IsValid() const {
    304     return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
    305            cpu_features_->Has(feature_);
    306   }
    307 };
    308 
    309 // A convenience scope for temporarily modifying a CPU features object. This
    310 // allows features to be enabled for short sequences.
    311 //
    312 // Expected usage:
    313 //
    314 //  {
    315 //    CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
    316 //    // This scope can now use CRC32, as well as anything else that was enabled
    317 //    // before the scope.
    318 //
    319 //    ...
    320 //
    321 //    // At the end of the scope, the original CPU features are restored.
    322 //  }
    323 class CPUFeaturesScope {
    324  public:
    325   // Start a CPUFeaturesScope on any object that implements
    326   // `CPUFeatures* GetCPUFeatures()`.
    327   template <typename T>
    328   explicit CPUFeaturesScope(T* cpu_features_wrapper,
    329                             CPUFeatures::Feature feature0 = CPUFeatures::kNone,
    330                             CPUFeatures::Feature feature1 = CPUFeatures::kNone,
    331                             CPUFeatures::Feature feature2 = CPUFeatures::kNone,
    332                             CPUFeatures::Feature feature3 = CPUFeatures::kNone)
    333       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
    334         old_features_(*cpu_features_) {
    335     cpu_features_->Combine(feature0, feature1, feature2, feature3);
    336   }
    337 
    338   template <typename T>
    339   CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
    340       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
    341         old_features_(*cpu_features_) {
    342     cpu_features_->Combine(other);
    343   }
    344 
    345   ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
    346 
    347   // For advanced usage, the CPUFeatures object can be accessed directly.
    348   // The scope will restore the original state when it ends.
    349 
    350   CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
    351 
    352   void SetCPUFeatures(const CPUFeatures& cpu_features) {
    353     *cpu_features_ = cpu_features;
    354   }
    355 
    356  private:
    357   CPUFeatures* const cpu_features_;
    358   const CPUFeatures old_features_;
    359 };
    360 
    361 
    362 }  // namespace vixl
    363 
    364 #endif  // VIXL_CPU_FEATURES_H
    365