Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 /**
     30  * The purpose of this module is to expose LLVM functionality not available
     31  * through the C++ bindings.
     32  */
     33 
     34 
     35 // Undef these vars just to silence warnings
     36 #undef PACKAGE_BUGREPORT
     37 #undef PACKAGE_NAME
     38 #undef PACKAGE_STRING
     39 #undef PACKAGE_TARNAME
     40 #undef PACKAGE_VERSION
     41 
     42 
     43 #include <stddef.h>
     44 
     45 // Workaround http://llvm.org/PR23628
     46 #if HAVE_LLVM >= 0x0307
     47 #  pragma push_macro("DEBUG")
     48 #  undef DEBUG
     49 #endif
     50 
     51 #include <llvm-c/Core.h>
     52 #if HAVE_LLVM >= 0x0306
     53 #include <llvm-c/Support.h>
     54 #endif
     55 #include <llvm-c/ExecutionEngine.h>
     56 #include <llvm/Target/TargetOptions.h>
     57 #include <llvm/ExecutionEngine/ExecutionEngine.h>
     58 #include <llvm/ADT/Triple.h>
     59 #if HAVE_LLVM >= 0x0307
     60 #include <llvm/Analysis/TargetLibraryInfo.h>
     61 #else
     62 #include <llvm/Target/TargetLibraryInfo.h>
     63 #endif
     64 #if HAVE_LLVM < 0x0306
     65 #include <llvm/ExecutionEngine/JITMemoryManager.h>
     66 #else
     67 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
     68 #endif
     69 #include <llvm/Support/CommandLine.h>
     70 #include <llvm/Support/Host.h>
     71 #include <llvm/Support/PrettyStackTrace.h>
     72 
     73 #include <llvm/Support/TargetSelect.h>
     74 
     75 #if HAVE_LLVM >= 0x0305
     76 #include <llvm/IR/CallSite.h>
     77 #endif
     78 #include <llvm/IR/IRBuilder.h>
     79 #include <llvm/IR/Module.h>
     80 #include <llvm/Support/CBindingWrapping.h>
     81 
     82 #include <llvm/Config/llvm-config.h>
     83 #if LLVM_USE_INTEL_JITEVENTS
     84 #include <llvm/ExecutionEngine/JITEventListener.h>
     85 #endif
     86 
     87 // Workaround http://llvm.org/PR23628
     88 #if HAVE_LLVM >= 0x0307
     89 #  pragma pop_macro("DEBUG")
     90 #endif
     91 
     92 #include "c11/threads.h"
     93 #include "os/os_thread.h"
     94 #include "pipe/p_config.h"
     95 #include "util/u_debug.h"
     96 #include "util/u_cpu_detect.h"
     97 
     98 #include "lp_bld_misc.h"
     99 #include "lp_bld_debug.h"
    100 
    101 namespace {
    102 
    103 class LLVMEnsureMultithreaded {
    104 public:
    105    LLVMEnsureMultithreaded()
    106    {
    107       if (!LLVMIsMultithreaded()) {
    108          LLVMStartMultithreaded();
    109       }
    110    }
    111 };
    112 
    113 static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;
    114 
    115 }
    116 
    117 static once_flag init_native_targets_once_flag = ONCE_FLAG_INIT;
    118 
    119 static void init_native_targets()
    120 {
    121    // If we have a native target, initialize it to ensure it is linked in and
    122    // usable by the JIT.
    123    llvm::InitializeNativeTarget();
    124 
    125    llvm::InitializeNativeTargetAsmPrinter();
    126 
    127    llvm::InitializeNativeTargetDisassembler();
    128 #if DEBUG && HAVE_LLVM >= 0x0306
    129    {
    130       char *env_llc_options = getenv("GALLIVM_LLC_OPTIONS");
    131       if (env_llc_options) {
    132          char *option;
    133          char *options[64] = {(char *) "llc"};      // Warning without cast
    134          int   n;
    135          for (n = 0, option = strtok(env_llc_options, " "); option; n++, option = strtok(NULL, " ")) {
    136             options[n + 1] = option;
    137          }
    138          if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
    139             debug_printf("llc additional options (%d):\n", n);
    140             for (int i = 1; i <= n; i++)
    141                debug_printf("\t%s\n", options[i]);
    142             debug_printf("\n");
    143          }
    144          LLVMParseCommandLineOptions(n + 1, options, NULL);
    145       }
    146    }
    147 #endif
    148 }
    149 
    150 extern "C" void
    151 lp_set_target_options(void)
    152 {
    153 #if HAVE_LLVM < 0x0304
    154    /*
    155     * By default LLVM adds a signal handler to output a pretty stack trace.
    156     * This signal handler is never removed, causing problems when unloading the
    157     * shared object where the gallium driver resides.
    158     */
    159    llvm::DisablePrettyStackTrace = true;
    160 #endif
    161 
    162    /* The llvm target registry is not thread-safe, so drivers and state-trackers
    163     * that want to initialize targets should use the lp_set_target_options()
    164     * function to safely initialize targets.
    165     *
    166     * LLVM targets should be initialized before the driver or state-tracker tries
    167     * to access the registry.
    168     */
    169    call_once(&init_native_targets_once_flag, init_native_targets);
    170 }
    171 
    172 extern "C"
    173 LLVMTargetLibraryInfoRef
    174 gallivm_create_target_library_info(const char *triple)
    175 {
    176    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
    177 #if HAVE_LLVM < 0x0307
    178    new llvm::TargetLibraryInfo(
    179 #else
    180    new llvm::TargetLibraryInfoImpl(
    181 #endif
    182    llvm::Triple(triple)));
    183 }
    184 
    185 extern "C"
    186 void
    187 gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
    188 {
    189    delete reinterpret_cast<
    190 #if HAVE_LLVM < 0x0307
    191    llvm::TargetLibraryInfo
    192 #else
    193    llvm::TargetLibraryInfoImpl
    194 #endif
    195    *>(library_info);
    196 }
    197 
    198 
    199 #if HAVE_LLVM < 0x0304
    200 
    201 extern "C"
    202 void
    203 LLVMSetAlignmentBackport(LLVMValueRef V,
    204                          unsigned Bytes)
    205 {
    206    switch (LLVMGetInstructionOpcode(V)) {
    207    case LLVMLoad:
    208       llvm::unwrap<llvm::LoadInst>(V)->setAlignment(Bytes);
    209       break;
    210    case LLVMStore:
    211       llvm::unwrap<llvm::StoreInst>(V)->setAlignment(Bytes);
    212       break;
    213    default:
    214       assert(0);
    215       break;
    216    }
    217 }
    218 
    219 #endif
    220 
    221 
    222 #if HAVE_LLVM < 0x0306
    223 typedef llvm::JITMemoryManager BaseMemoryManager;
    224 #else
    225 typedef llvm::RTDyldMemoryManager BaseMemoryManager;
    226 #endif
    227 
    228 
    229 /*
    230  * Delegating is tedious but the default manager class is hidden in an
    231  * anonymous namespace in LLVM, so we cannot just derive from it to change
    232  * its behavior.
    233  */
    234 class DelegatingJITMemoryManager : public BaseMemoryManager {
    235 
    236    protected:
    237       virtual BaseMemoryManager *mgr() const = 0;
    238 
    239    public:
    240 #if HAVE_LLVM < 0x0306
    241       /*
    242        * From JITMemoryManager
    243        */
    244       virtual void setMemoryWritable() {
    245          mgr()->setMemoryWritable();
    246       }
    247       virtual void setMemoryExecutable() {
    248          mgr()->setMemoryExecutable();
    249       }
    250       virtual void setPoisonMemory(bool poison) {
    251          mgr()->setPoisonMemory(poison);
    252       }
    253       virtual void AllocateGOT() {
    254          mgr()->AllocateGOT();
    255          /*
    256           * isManagingGOT() is not virtual in base class so we can't delegate.
    257           * Instead we mirror the value of HasGOT in our instance.
    258           */
    259          HasGOT = mgr()->isManagingGOT();
    260       }
    261       virtual uint8_t *getGOTBase() const {
    262          return mgr()->getGOTBase();
    263       }
    264       virtual uint8_t *startFunctionBody(const llvm::Function *F,
    265                                          uintptr_t &ActualSize) {
    266          return mgr()->startFunctionBody(F, ActualSize);
    267       }
    268       virtual uint8_t *allocateStub(const llvm::GlobalValue *F,
    269                                     unsigned StubSize,
    270                                     unsigned Alignment) {
    271          return mgr()->allocateStub(F, StubSize, Alignment);
    272       }
    273       virtual void endFunctionBody(const llvm::Function *F,
    274                                    uint8_t *FunctionStart,
    275                                    uint8_t *FunctionEnd) {
    276          mgr()->endFunctionBody(F, FunctionStart, FunctionEnd);
    277       }
    278       virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
    279          return mgr()->allocateSpace(Size, Alignment);
    280       }
    281       virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
    282          return mgr()->allocateGlobal(Size, Alignment);
    283       }
    284       virtual void deallocateFunctionBody(void *Body) {
    285          mgr()->deallocateFunctionBody(Body);
    286       }
    287 #if HAVE_LLVM < 0x0304
    288       virtual uint8_t *startExceptionTable(const llvm::Function *F,
    289                                            uintptr_t &ActualSize) {
    290          return mgr()->startExceptionTable(F, ActualSize);
    291       }
    292       virtual void endExceptionTable(const llvm::Function *F,
    293                                      uint8_t *TableStart,
    294                                      uint8_t *TableEnd,
    295                                      uint8_t *FrameRegister) {
    296          mgr()->endExceptionTable(F, TableStart, TableEnd,
    297                                   FrameRegister);
    298       }
    299       virtual void deallocateExceptionTable(void *ET) {
    300          mgr()->deallocateExceptionTable(ET);
    301       }
    302 #endif
    303       virtual bool CheckInvariants(std::string &s) {
    304          return mgr()->CheckInvariants(s);
    305       }
    306       virtual size_t GetDefaultCodeSlabSize() {
    307          return mgr()->GetDefaultCodeSlabSize();
    308       }
    309       virtual size_t GetDefaultDataSlabSize() {
    310          return mgr()->GetDefaultDataSlabSize();
    311       }
    312       virtual size_t GetDefaultStubSlabSize() {
    313          return mgr()->GetDefaultStubSlabSize();
    314       }
    315       virtual unsigned GetNumCodeSlabs() {
    316          return mgr()->GetNumCodeSlabs();
    317       }
    318       virtual unsigned GetNumDataSlabs() {
    319          return mgr()->GetNumDataSlabs();
    320       }
    321       virtual unsigned GetNumStubSlabs() {
    322          return mgr()->GetNumStubSlabs();
    323       }
    324 #endif
    325 
    326       /*
    327        * From RTDyldMemoryManager
    328        */
    329 #if HAVE_LLVM >= 0x0304
    330       virtual uint8_t *allocateCodeSection(uintptr_t Size,
    331                                            unsigned Alignment,
    332                                            unsigned SectionID,
    333                                            llvm::StringRef SectionName) {
    334          return mgr()->allocateCodeSection(Size, Alignment, SectionID,
    335                                            SectionName);
    336       }
    337 #else
    338       virtual uint8_t *allocateCodeSection(uintptr_t Size,
    339                                            unsigned Alignment,
    340                                            unsigned SectionID) {
    341          return mgr()->allocateCodeSection(Size, Alignment, SectionID);
    342       }
    343 #endif
    344       virtual uint8_t *allocateDataSection(uintptr_t Size,
    345                                            unsigned Alignment,
    346                                            unsigned SectionID,
    347 #if HAVE_LLVM >= 0x0304
    348                                            llvm::StringRef SectionName,
    349 #endif
    350                                            bool IsReadOnly) {
    351          return mgr()->allocateDataSection(Size, Alignment, SectionID,
    352 #if HAVE_LLVM >= 0x0304
    353                                            SectionName,
    354 #endif
    355                                            IsReadOnly);
    356       }
    357 #if HAVE_LLVM >= 0x0304
    358       virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
    359          mgr()->registerEHFrames(Addr, LoadAddr, Size);
    360       }
    361 #else
    362       virtual void registerEHFrames(llvm::StringRef SectionData) {
    363          mgr()->registerEHFrames(SectionData);
    364       }
    365 #endif
    366 #if HAVE_LLVM >= 0x0500
    367       virtual void deregisterEHFrames() {
    368          mgr()->deregisterEHFrames();
    369       }
    370 #elif HAVE_LLVM >= 0x0304
    371       virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
    372          mgr()->deregisterEHFrames(Addr, LoadAddr, Size);
    373       }
    374 #endif
    375       virtual void *getPointerToNamedFunction(const std::string &Name,
    376                                               bool AbortOnFailure=true) {
    377          return mgr()->getPointerToNamedFunction(Name, AbortOnFailure);
    378       }
    379 #if HAVE_LLVM <= 0x0303
    380       virtual bool applyPermissions(std::string *ErrMsg = 0) {
    381          return mgr()->applyPermissions(ErrMsg);
    382       }
    383 #else
    384       virtual bool finalizeMemory(std::string *ErrMsg = 0) {
    385          return mgr()->finalizeMemory(ErrMsg);
    386       }
    387 #endif
    388 };
    389 
    390 
    391 /*
    392  * Delegate memory management to one shared manager for more efficient use
    393  * of memory than creating a separate pool for each LLVM engine.
    394  * Keep generated code until freeGeneratedCode() is called, instead of when
    395  * memory manager is destroyed, which happens during engine destruction.
    396  * This allows additional memory savings as we don't have to keep the engine
    397  * around in order to use the code.
    398  * All methods are delegated to the shared manager except destruction and
    399  * deallocating code.  For the latter we just remember what needs to be
    400  * deallocated later.  The shared manager is deleted once it is empty.
    401  */
    402 class ShaderMemoryManager : public DelegatingJITMemoryManager {
    403 
    404    BaseMemoryManager *TheMM;
    405 
    406    struct GeneratedCode {
    407       typedef std::vector<void *> Vec;
    408       Vec FunctionBody, ExceptionTable;
    409       BaseMemoryManager *TheMM;
    410 
    411       GeneratedCode(BaseMemoryManager *MM) {
    412          TheMM = MM;
    413       }
    414 
    415       ~GeneratedCode() {
    416          /*
    417           * Deallocate things as previously requested and
    418           * free shared manager when no longer used.
    419           */
    420 #if HAVE_LLVM < 0x0306
    421          Vec::iterator i;
    422 
    423          assert(TheMM);
    424          for ( i = FunctionBody.begin(); i != FunctionBody.end(); ++i )
    425             TheMM->deallocateFunctionBody(*i);
    426 #if HAVE_LLVM < 0x0304
    427          for ( i = ExceptionTable.begin(); i != ExceptionTable.end(); ++i )
    428             TheMM->deallocateExceptionTable(*i);
    429 #endif /* HAVE_LLVM < 0x0304 */
    430 #endif /* HAVE_LLVM < 0x0306 */
    431       }
    432    };
    433 
    434    GeneratedCode *code;
    435 
    436    BaseMemoryManager *mgr() const {
    437       return TheMM;
    438    }
    439 
    440    public:
    441 
    442       ShaderMemoryManager(BaseMemoryManager* MM) {
    443          TheMM = MM;
    444          code = new GeneratedCode(MM);
    445       }
    446 
    447       virtual ~ShaderMemoryManager() {
    448          /*
    449           * 'code' is purposely not deleted.  It is the user's responsibility
    450           * to call getGeneratedCode() and freeGeneratedCode().
    451           */
    452       }
    453 
    454       struct lp_generated_code *getGeneratedCode() {
    455          return (struct lp_generated_code *) code;
    456       }
    457 
    458       static void freeGeneratedCode(struct lp_generated_code *code) {
    459          delete (GeneratedCode *) code;
    460       }
    461 
    462 #if HAVE_LLVM < 0x0304
    463       virtual void deallocateExceptionTable(void *ET) {
    464          // remember for later deallocation
    465          code->ExceptionTable.push_back(ET);
    466       }
    467 #endif
    468 
    469       virtual void deallocateFunctionBody(void *Body) {
    470          // remember for later deallocation
    471          code->FunctionBody.push_back(Body);
    472       }
    473 };
    474 
    475 
    476 /**
    477  * Same as LLVMCreateJITCompilerForModule, but:
    478  * - allows using MCJIT and enabling AVX feature where available.
    479  * - set target options
    480  *
    481  * See also:
    482  * - llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
    483  * - llvm/tools/lli/lli.cpp
    484  * - http://markmail.org/message/ttkuhvgj4cxxy2on#query:+page:1+mid:aju2dggerju3ivd3+state:results
    485  */
    486 extern "C"
    487 LLVMBool
    488 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
    489                                         lp_generated_code **OutCode,
    490                                         LLVMModuleRef M,
    491                                         LLVMMCJITMemoryManagerRef CMM,
    492                                         unsigned OptLevel,
    493                                         int useMCJIT,
    494                                         char **OutError)
    495 {
    496    using namespace llvm;
    497 
    498    std::string Error;
    499 #if HAVE_LLVM >= 0x0306
    500    EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
    501 #else
    502    EngineBuilder builder(unwrap(M));
    503 #endif
    504 
    505    /**
    506     * LLVM 3.1+ haven't more "extern unsigned llvm::StackAlignmentOverride" and
    507     * friends for configuring code generation options, like stack alignment.
    508     */
    509    TargetOptions options;
    510 #if defined(PIPE_ARCH_X86)
    511    options.StackAlignmentOverride = 4;
    512 #if HAVE_LLVM < 0x0304
    513    options.RealignStack = true;
    514 #endif
    515 #endif
    516 
    517 #if defined(DEBUG) && HAVE_LLVM < 0x0307
    518    options.JITEmitDebugInfo = true;
    519 #endif
    520 
    521    /* XXX: Workaround http://llvm.org/PR21435 */
    522 #if defined(DEBUG) || defined(PROFILE) || \
    523     (HAVE_LLVM >= 0x0303 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)))
    524 #if HAVE_LLVM < 0x0304
    525    options.NoFramePointerElimNonLeaf = true;
    526 #endif
    527 #if HAVE_LLVM < 0x0307
    528    options.NoFramePointerElim = true;
    529 #endif
    530 #endif
    531 
    532    builder.setEngineKind(EngineKind::JIT)
    533           .setErrorStr(&Error)
    534           .setTargetOptions(options)
    535           .setOptLevel((CodeGenOpt::Level)OptLevel);
    536 
    537    if (useMCJIT) {
    538 #if HAVE_LLVM < 0x0306
    539        builder.setUseMCJIT(true);
    540 #endif
    541 #ifdef _WIN32
    542        /*
    543         * MCJIT works on Windows, but currently only through ELF object format.
    544         *
    545         * XXX: We could use `LLVM_HOST_TRIPLE "-elf"` but LLVM_HOST_TRIPLE has
    546         * different strings for MinGW/MSVC, so better play it safe and be
    547         * explicit.
    548         */
    549 #  ifdef _WIN64
    550        LLVMSetTarget(M, "x86_64-pc-win32-elf");
    551 #  else
    552        LLVMSetTarget(M, "i686-pc-win32-elf");
    553 #  endif
    554 #endif
    555    }
    556 
    557    llvm::SmallVector<std::string, 16> MAttrs;
    558 
    559 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    560 #if HAVE_LLVM >= 0x0400
    561    /* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
    562     * which allows us to enable/disable code generation based
    563     * on the results of cpuid.
    564     */
    565    llvm::StringMap<bool> features;
    566    llvm::sys::getHostCPUFeatures(features);
    567 
    568    for (StringMapIterator<bool> f = features.begin();
    569         f != features.end();
    570         ++f) {
    571       MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
    572    }
    573 #else
    574    /*
    575     * We need to unset attributes because sometimes LLVM mistakenly assumes
    576     * certain features are present given the processor name.
    577     *
    578     * https://bugs.freedesktop.org/show_bug.cgi?id=92214
    579     * http://llvm.org/PR25021
    580     * http://llvm.org/PR19429
    581     * http://llvm.org/PR16721
    582     */
    583    MAttrs.push_back(util_cpu_caps.has_sse    ? "+sse"    : "-sse"   );
    584    MAttrs.push_back(util_cpu_caps.has_sse2   ? "+sse2"   : "-sse2"  );
    585    MAttrs.push_back(util_cpu_caps.has_sse3   ? "+sse3"   : "-sse3"  );
    586    MAttrs.push_back(util_cpu_caps.has_ssse3  ? "+ssse3"  : "-ssse3" );
    587 #if HAVE_LLVM >= 0x0304
    588    MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
    589 #else
    590    MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41"  : "-sse41" );
    591 #endif
    592 #if HAVE_LLVM >= 0x0304
    593    MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
    594 #else
    595    MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42"  : "-sse42" );
    596 #endif
    597    /*
    598     * AVX feature is not automatically detected from CPUID by the X86 target
    599     * yet, because the old (yet default) JIT engine is not capable of
    600     * emitting the opcodes. On newer llvm versions it is and at least some
    601     * versions (tested with 3.3) will emit avx opcodes without this anyway.
    602     */
    603    MAttrs.push_back(util_cpu_caps.has_avx  ? "+avx"  : "-avx");
    604    MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
    605    if (HAVE_LLVM >= 0x0304) {
    606       MAttrs.push_back(util_cpu_caps.has_fma  ? "+fma"  : "-fma");
    607    } else {
    608       /*
    609        * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and
    610        * llvm.fmuladd.v2f32 intrinsics when FMA is available.
    611        */
    612       MAttrs.push_back("-fma");
    613    }
    614    MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
    615    /* disable avx512 and all subvariants */
    616 #if HAVE_LLVM >= 0x0304
    617    MAttrs.push_back("-avx512cd");
    618    MAttrs.push_back("-avx512er");
    619    MAttrs.push_back("-avx512f");
    620    MAttrs.push_back("-avx512pf");
    621 #endif
    622 #if HAVE_LLVM >= 0x0305
    623    MAttrs.push_back("-avx512bw");
    624    MAttrs.push_back("-avx512dq");
    625    MAttrs.push_back("-avx512vl");
    626 #endif
    627 #endif
    628 #endif
    629 
    630 #if defined(PIPE_ARCH_PPC)
    631    MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
    632 #if (HAVE_LLVM >= 0x0304)
    633 #if (HAVE_LLVM < 0x0400)
    634    /*
    635     * Make sure VSX instructions are disabled
    636     * See LLVM bugs:
    637     * https://llvm.org/bugs/show_bug.cgi?id=25503#c7 (fixed in 3.8.1)
    638     * https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1)
    639     * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
    640     * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0)
    641     */
    642    if (util_cpu_caps.has_altivec) {
    643       MAttrs.push_back("-vsx");
    644    }
    645 #else
    646    /*
    647     * Bug 25503 is fixed, by the same fix that fixed
    648     * bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1).
    649     * BZ 33531 actually comprises more than one bug, all of
    650     * which are fixed in LLVM 4.0.
    651     *
    652     * With LLVM 4.0 or higher:
    653     * Make sure VSX instructions are ENABLED, unless
    654     * a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
    655     * b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
    656     */
    657    if (util_cpu_caps.has_altivec) {
    658       char *env_mattrs = getenv("GALLIVM_MATTRS");
    659       if (env_mattrs) {
    660          MAttrs.push_back(env_mattrs);
    661       }
    662       else {
    663          boolean enable_vsx = true;
    664          char *env_vsx = getenv("GALLIVM_VSX");
    665          if (env_vsx && env_vsx[0] == '0') {
    666             enable_vsx = false;
    667          }
    668          if (enable_vsx)
    669             MAttrs.push_back("+vsx");
    670          else
    671             MAttrs.push_back("-vsx");
    672       }
    673    }
    674 #endif
    675 #endif
    676 #endif
    677 
    678    builder.setMAttrs(MAttrs);
    679 
    680    if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
    681       int n = MAttrs.size();
    682       if (n > 0) {
    683          debug_printf("llc -mattr option(s): ");
    684          for (int i = 0; i < n; i++)
    685             debug_printf("%s%s", MAttrs[i].c_str(), (i < n - 1) ? "," : "");
    686          debug_printf("\n");
    687       }
    688    }
    689 
    690 #if HAVE_LLVM >= 0x0305
    691    StringRef MCPU = llvm::sys::getHostCPUName();
    692    /*
    693     * The cpu bits are no longer set automatically, so need to set mcpu manually.
    694     * Note that the MAttrs set above will be sort of ignored (since we should
    695     * not set any which would not be set by specifying the cpu anyway).
    696     * It ought to be safe though since getHostCPUName() should include bits
    697     * not only from the cpu but environment as well (for instance if it's safe
    698     * to use avx instructions which need OS support). According to
    699     * http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this
    700     * right it may be necessary to specify older cpu (or disable mattrs) though
    701     * when not using MCJIT so no instructions are generated which the old JIT
    702     * can't handle. Not entirely sure if we really need to do anything yet.
    703     */
    704 #if defined(PIPE_ARCH_LITTLE_ENDIAN)  && defined(PIPE_ARCH_PPC_64)
    705    /*
    706     * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
    707     * resulting in (big-endian) "generic" being returned on
    708     * little-endian Power8NVL systems.  The result was that code that
    709     * attempted to load the least significant 32 bits of a 64-bit quantity
    710     * from memory loaded the wrong half.  This resulted in failures in some
    711     * Piglit tests, e.g.
    712     * .../arb_gpu_shader_fp64/execution/conversion/frag-conversion-explicit-double-uint
    713     */
    714    if (MCPU == "generic")
    715       MCPU = "pwr8";
    716 #endif
    717    builder.setMCPU(MCPU);
    718    if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
    719       debug_printf("llc -mcpu option: %s\n", MCPU.str().c_str());
    720    }
    721 #endif
    722 
    723    ShaderMemoryManager *MM = NULL;
    724    if (useMCJIT) {
    725        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
    726        MM = new ShaderMemoryManager(JMM);
    727        *OutCode = MM->getGeneratedCode();
    728 
    729 #if HAVE_LLVM >= 0x0306
    730        builder.setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager>(MM));
    731        MM = NULL; // ownership taken by std::unique_ptr
    732 #elif HAVE_LLVM > 0x0303
    733        builder.setMCJITMemoryManager(MM);
    734 #else
    735        builder.setJITMemoryManager(MM);
    736 #endif
    737    } else {
    738 #if HAVE_LLVM < 0x0306
    739        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
    740        MM = new ShaderMemoryManager(JMM);
    741        *OutCode = MM->getGeneratedCode();
    742 
    743        builder.setJITMemoryManager(MM);
    744 #else
    745        assert(0);
    746 #endif
    747    }
    748 
    749    ExecutionEngine *JIT;
    750 
    751    JIT = builder.create();
    752 #if LLVM_USE_INTEL_JITEVENTS
    753    JITEventListener *JEL = JITEventListener::createIntelJITEventListener();
    754    JIT->RegisterJITEventListener(JEL);
    755 #endif
    756    if (JIT) {
    757       *OutJIT = wrap(JIT);
    758       return 0;
    759    }
    760    lp_free_generated_code(*OutCode);
    761    *OutCode = 0;
    762    delete MM;
    763    *OutError = strdup(Error.c_str());
    764    return 1;
    765 }
    766 
    767 
    768 extern "C"
    769 void
    770 lp_free_generated_code(struct lp_generated_code *code)
    771 {
    772    ShaderMemoryManager::freeGeneratedCode(code);
    773 }
    774 
    775 extern "C"
    776 LLVMMCJITMemoryManagerRef
    777 lp_get_default_memory_manager()
    778 {
    779    BaseMemoryManager *mm;
    780 #if HAVE_LLVM < 0x0306
    781    mm = llvm::JITMemoryManager::CreateDefaultMemManager();
    782 #else
    783    mm = new llvm::SectionMemoryManager();
    784 #endif
    785    return reinterpret_cast<LLVMMCJITMemoryManagerRef>(mm);
    786 }
    787 
    788 extern "C"
    789 void
    790 lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr)
    791 {
    792    delete reinterpret_cast<BaseMemoryManager*>(memorymgr);
    793 }
    794 
    795 extern "C" LLVMValueRef
    796 lp_get_called_value(LLVMValueRef call)
    797 {
    798 #if HAVE_LLVM >= 0x0309
    799 	return LLVMGetCalledValue(call);
    800 #elif HAVE_LLVM >= 0x0305
    801 	return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
    802 #else
    803 	return NULL; /* radeonsi doesn't support so old LLVM. */
    804 #endif
    805 }
    806 
    807 extern "C" bool
    808 lp_is_function(LLVMValueRef v)
    809 {
    810 #if HAVE_LLVM >= 0x0309
    811 	return LLVMGetValueKind(v) == LLVMFunctionValueKind;
    812 #else
    813 	return llvm::isa<llvm::Function>(llvm::unwrap(v));
    814 #endif
    815 }
    816