Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * The above copyright notice and this permission notice (including the
     23  * next paragraph) shall be included in all copies or substantial portions
     24  * of the Software.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 /**
     30  * The purpose of this module is to expose LLVM functionality not available
     31  * through the C++ bindings.
     32  */
     33 
     34 
     35 #ifndef __STDC_LIMIT_MACROS
     36 #define __STDC_LIMIT_MACROS
     37 #endif
     38 
     39 #ifndef __STDC_CONSTANT_MACROS
     40 #define __STDC_CONSTANT_MACROS
     41 #endif
     42 
     43 // Undef these vars just to silence warnings
     44 #undef PACKAGE_BUGREPORT
     45 #undef PACKAGE_NAME
     46 #undef PACKAGE_STRING
     47 #undef PACKAGE_TARNAME
     48 #undef PACKAGE_VERSION
     49 
     50 
     51 #include <stddef.h>
     52 
     53 // Workaround http://llvm.org/PR23628
     54 #if HAVE_LLVM >= 0x0307
     55 #  pragma push_macro("DEBUG")
     56 #  undef DEBUG
     57 #endif
     58 
     59 #include <llvm-c/Core.h>
     60 #include <llvm-c/ExecutionEngine.h>
     61 #include <llvm/Target/TargetOptions.h>
     62 #include <llvm/ExecutionEngine/ExecutionEngine.h>
     63 #include <llvm/ADT/Triple.h>
     64 #if HAVE_LLVM >= 0x0307
     65 #include <llvm/Analysis/TargetLibraryInfo.h>
     66 #else
     67 #include <llvm/Target/TargetLibraryInfo.h>
     68 #endif
     69 #if HAVE_LLVM < 0x0306
     70 #include <llvm/ExecutionEngine/JITMemoryManager.h>
     71 #else
     72 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
     73 #endif
     74 #include <llvm/Support/CommandLine.h>
     75 #include <llvm/Support/Host.h>
     76 #include <llvm/Support/PrettyStackTrace.h>
     77 
     78 #include <llvm/Support/TargetSelect.h>
     79 
     80 #if HAVE_LLVM >= 0x0305
     81 #include <llvm/IR/CallSite.h>
     82 #endif
     83 #include <llvm/IR/IRBuilder.h>
     84 #include <llvm/IR/Module.h>
     85 #include <llvm/Support/CBindingWrapping.h>
     86 
     87 #include <llvm/Config/llvm-config.h>
     88 #if LLVM_USE_INTEL_JITEVENTS
     89 #include <llvm/ExecutionEngine/JITEventListener.h>
     90 #endif
     91 
     92 // Workaround http://llvm.org/PR23628
     93 #if HAVE_LLVM >= 0x0307
     94 #  pragma pop_macro("DEBUG")
     95 #endif
     96 
     97 #include "c11/threads.h"
     98 #include "os/os_thread.h"
     99 #include "pipe/p_config.h"
    100 #include "util/u_debug.h"
    101 #include "util/u_cpu_detect.h"
    102 
    103 #include "lp_bld_misc.h"
    104 #include "lp_bld_debug.h"
    105 
    106 namespace {
    107 
    108 class LLVMEnsureMultithreaded {
    109 public:
    110    LLVMEnsureMultithreaded()
    111    {
    112       if (!LLVMIsMultithreaded()) {
    113          LLVMStartMultithreaded();
    114       }
    115    }
    116 };
    117 
    118 static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;
    119 
    120 }
    121 
    122 static once_flag init_native_targets_once_flag = ONCE_FLAG_INIT;
    123 
    124 static void init_native_targets()
    125 {
    126    // If we have a native target, initialize it to ensure it is linked in and
    127    // usable by the JIT.
    128    llvm::InitializeNativeTarget();
    129 
    130    llvm::InitializeNativeTargetAsmPrinter();
    131 
    132    llvm::InitializeNativeTargetDisassembler();
    133 }
    134 
    135 /**
    136  * The llvm target registry is not thread-safe, so drivers and state-trackers
    137  * that want to initialize targets should use the gallivm_init_llvm_targets()
    138  * function to safely initialize targets.
    139  *
    140  * LLVM targets should be initialized before the driver or state-tracker tries
    141  * to access the registry.
    142  */
    143 extern "C" void
    144 gallivm_init_llvm_targets(void)
    145 {
    146    call_once(&init_native_targets_once_flag, init_native_targets);
    147 }
    148 
    149 extern "C" void
    150 lp_set_target_options(void)
    151 {
    152 #if HAVE_LLVM < 0x0304
    153    /*
    154     * By default LLVM adds a signal handler to output a pretty stack trace.
    155     * This signal handler is never removed, causing problems when unloading the
    156     * shared object where the gallium driver resides.
    157     */
    158    llvm::DisablePrettyStackTrace = true;
    159 #endif
    160 
    161    gallivm_init_llvm_targets();
    162 }
    163 
    164 extern "C"
    165 LLVMTargetLibraryInfoRef
    166 gallivm_create_target_library_info(const char *triple)
    167 {
    168    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
    169 #if HAVE_LLVM < 0x0307
    170    new llvm::TargetLibraryInfo(
    171 #else
    172    new llvm::TargetLibraryInfoImpl(
    173 #endif
    174    llvm::Triple(triple)));
    175 }
    176 
    177 extern "C"
    178 void
    179 gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
    180 {
    181    delete reinterpret_cast<
    182 #if HAVE_LLVM < 0x0307
    183    llvm::TargetLibraryInfo
    184 #else
    185    llvm::TargetLibraryInfoImpl
    186 #endif
    187    *>(library_info);
    188 }
    189 
    190 
    191 #if HAVE_LLVM < 0x0304
    192 
    193 extern "C"
    194 void
    195 LLVMSetAlignmentBackport(LLVMValueRef V,
    196                          unsigned Bytes)
    197 {
    198    switch (LLVMGetInstructionOpcode(V)) {
    199    case LLVMLoad:
    200       llvm::unwrap<llvm::LoadInst>(V)->setAlignment(Bytes);
    201       break;
    202    case LLVMStore:
    203       llvm::unwrap<llvm::StoreInst>(V)->setAlignment(Bytes);
    204       break;
    205    default:
    206       assert(0);
    207       break;
    208    }
    209 }
    210 
    211 #endif
    212 
    213 
    214 #if HAVE_LLVM < 0x0306
    215 typedef llvm::JITMemoryManager BaseMemoryManager;
    216 #else
    217 typedef llvm::RTDyldMemoryManager BaseMemoryManager;
    218 #endif
    219 
    220 
    221 /*
    222  * Delegating is tedious but the default manager class is hidden in an
    223  * anonymous namespace in LLVM, so we cannot just derive from it to change
    224  * its behavior.
    225  */
    226 class DelegatingJITMemoryManager : public BaseMemoryManager {
    227 
    228    protected:
    229       virtual BaseMemoryManager *mgr() const = 0;
    230 
    231    public:
    232 #if HAVE_LLVM < 0x0306
    233       /*
    234        * From JITMemoryManager
    235        */
    236       virtual void setMemoryWritable() {
    237          mgr()->setMemoryWritable();
    238       }
    239       virtual void setMemoryExecutable() {
    240          mgr()->setMemoryExecutable();
    241       }
    242       virtual void setPoisonMemory(bool poison) {
    243          mgr()->setPoisonMemory(poison);
    244       }
    245       virtual void AllocateGOT() {
    246          mgr()->AllocateGOT();
    247          /*
    248           * isManagingGOT() is not virtual in base class so we can't delegate.
    249           * Instead we mirror the value of HasGOT in our instance.
    250           */
    251          HasGOT = mgr()->isManagingGOT();
    252       }
    253       virtual uint8_t *getGOTBase() const {
    254          return mgr()->getGOTBase();
    255       }
    256       virtual uint8_t *startFunctionBody(const llvm::Function *F,
    257                                          uintptr_t &ActualSize) {
    258          return mgr()->startFunctionBody(F, ActualSize);
    259       }
    260       virtual uint8_t *allocateStub(const llvm::GlobalValue *F,
    261                                     unsigned StubSize,
    262                                     unsigned Alignment) {
    263          return mgr()->allocateStub(F, StubSize, Alignment);
    264       }
    265       virtual void endFunctionBody(const llvm::Function *F,
    266                                    uint8_t *FunctionStart,
    267                                    uint8_t *FunctionEnd) {
    268          mgr()->endFunctionBody(F, FunctionStart, FunctionEnd);
    269       }
    270       virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
    271          return mgr()->allocateSpace(Size, Alignment);
    272       }
    273       virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
    274          return mgr()->allocateGlobal(Size, Alignment);
    275       }
    276       virtual void deallocateFunctionBody(void *Body) {
    277          mgr()->deallocateFunctionBody(Body);
    278       }
    279 #if HAVE_LLVM < 0x0304
    280       virtual uint8_t *startExceptionTable(const llvm::Function *F,
    281                                            uintptr_t &ActualSize) {
    282          return mgr()->startExceptionTable(F, ActualSize);
    283       }
    284       virtual void endExceptionTable(const llvm::Function *F,
    285                                      uint8_t *TableStart,
    286                                      uint8_t *TableEnd,
    287                                      uint8_t *FrameRegister) {
    288          mgr()->endExceptionTable(F, TableStart, TableEnd,
    289                                   FrameRegister);
    290       }
    291       virtual void deallocateExceptionTable(void *ET) {
    292          mgr()->deallocateExceptionTable(ET);
    293       }
    294 #endif
    295       virtual bool CheckInvariants(std::string &s) {
    296          return mgr()->CheckInvariants(s);
    297       }
    298       virtual size_t GetDefaultCodeSlabSize() {
    299          return mgr()->GetDefaultCodeSlabSize();
    300       }
    301       virtual size_t GetDefaultDataSlabSize() {
    302          return mgr()->GetDefaultDataSlabSize();
    303       }
    304       virtual size_t GetDefaultStubSlabSize() {
    305          return mgr()->GetDefaultStubSlabSize();
    306       }
    307       virtual unsigned GetNumCodeSlabs() {
    308          return mgr()->GetNumCodeSlabs();
    309       }
    310       virtual unsigned GetNumDataSlabs() {
    311          return mgr()->GetNumDataSlabs();
    312       }
    313       virtual unsigned GetNumStubSlabs() {
    314          return mgr()->GetNumStubSlabs();
    315       }
    316 #endif
    317 
    318       /*
    319        * From RTDyldMemoryManager
    320        */
    321 #if HAVE_LLVM >= 0x0304
    322       virtual uint8_t *allocateCodeSection(uintptr_t Size,
    323                                            unsigned Alignment,
    324                                            unsigned SectionID,
    325                                            llvm::StringRef SectionName) {
    326          return mgr()->allocateCodeSection(Size, Alignment, SectionID,
    327                                            SectionName);
    328       }
    329 #else
    330       virtual uint8_t *allocateCodeSection(uintptr_t Size,
    331                                            unsigned Alignment,
    332                                            unsigned SectionID) {
    333          return mgr()->allocateCodeSection(Size, Alignment, SectionID);
    334       }
    335 #endif
    336       virtual uint8_t *allocateDataSection(uintptr_t Size,
    337                                            unsigned Alignment,
    338                                            unsigned SectionID,
    339 #if HAVE_LLVM >= 0x0304
    340                                            llvm::StringRef SectionName,
    341 #endif
    342                                            bool IsReadOnly) {
    343          return mgr()->allocateDataSection(Size, Alignment, SectionID,
    344 #if HAVE_LLVM >= 0x0304
    345                                            SectionName,
    346 #endif
    347                                            IsReadOnly);
    348       }
    349 #if HAVE_LLVM >= 0x0304
    350       virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
    351          mgr()->registerEHFrames(Addr, LoadAddr, Size);
    352       }
    353       virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
    354          mgr()->deregisterEHFrames(Addr, LoadAddr, Size);
    355       }
    356 #else
    357       virtual void registerEHFrames(llvm::StringRef SectionData) {
    358          mgr()->registerEHFrames(SectionData);
    359       }
    360 #endif
    361       virtual void *getPointerToNamedFunction(const std::string &Name,
    362                                               bool AbortOnFailure=true) {
    363          return mgr()->getPointerToNamedFunction(Name, AbortOnFailure);
    364       }
    365 #if HAVE_LLVM <= 0x0303
    366       virtual bool applyPermissions(std::string *ErrMsg = 0) {
    367          return mgr()->applyPermissions(ErrMsg);
    368       }
    369 #else
    370       virtual bool finalizeMemory(std::string *ErrMsg = 0) {
    371          return mgr()->finalizeMemory(ErrMsg);
    372       }
    373 #endif
    374 };
    375 
    376 
    377 /*
    378  * Delegate memory management to one shared manager for more efficient use
    379  * of memory than creating a separate pool for each LLVM engine.
    380  * Keep generated code until freeGeneratedCode() is called, instead of when
    381  * memory manager is destroyed, which happens during engine destruction.
    382  * This allows additional memory savings as we don't have to keep the engine
    383  * around in order to use the code.
    384  * All methods are delegated to the shared manager except destruction and
    385  * deallocating code.  For the latter we just remember what needs to be
    386  * deallocated later.  The shared manager is deleted once it is empty.
    387  */
    388 class ShaderMemoryManager : public DelegatingJITMemoryManager {
    389 
    390    BaseMemoryManager *TheMM;
    391 
    392    struct GeneratedCode {
    393       typedef std::vector<void *> Vec;
    394       Vec FunctionBody, ExceptionTable;
    395       BaseMemoryManager *TheMM;
    396 
    397       GeneratedCode(BaseMemoryManager *MM) {
    398          TheMM = MM;
    399       }
    400 
    401       ~GeneratedCode() {
    402          /*
    403           * Deallocate things as previously requested and
    404           * free shared manager when no longer used.
    405           */
    406 #if HAVE_LLVM < 0x0306
    407          Vec::iterator i;
    408 
    409          assert(TheMM);
    410          for ( i = FunctionBody.begin(); i != FunctionBody.end(); ++i )
    411             TheMM->deallocateFunctionBody(*i);
    412 #if HAVE_LLVM < 0x0304
    413          for ( i = ExceptionTable.begin(); i != ExceptionTable.end(); ++i )
    414             TheMM->deallocateExceptionTable(*i);
    415 #endif /* HAVE_LLVM < 0x0304 */
    416 #endif /* HAVE_LLVM < 0x0306 */
    417       }
    418    };
    419 
    420    GeneratedCode *code;
    421 
    422    BaseMemoryManager *mgr() const {
    423       return TheMM;
    424    }
    425 
    426    public:
    427 
    428       ShaderMemoryManager(BaseMemoryManager* MM) {
    429          TheMM = MM;
    430          code = new GeneratedCode(MM);
    431       }
    432 
    433       virtual ~ShaderMemoryManager() {
    434          /*
    435           * 'code' is purposely not deleted.  It is the user's responsibility
    436           * to call getGeneratedCode() and freeGeneratedCode().
    437           */
    438       }
    439 
    440       struct lp_generated_code *getGeneratedCode() {
    441          return (struct lp_generated_code *) code;
    442       }
    443 
    444       static void freeGeneratedCode(struct lp_generated_code *code) {
    445          delete (GeneratedCode *) code;
    446       }
    447 
    448 #if HAVE_LLVM < 0x0304
    449       virtual void deallocateExceptionTable(void *ET) {
    450          // remember for later deallocation
    451          code->ExceptionTable.push_back(ET);
    452       }
    453 #endif
    454 
    455       virtual void deallocateFunctionBody(void *Body) {
    456          // remember for later deallocation
    457          code->FunctionBody.push_back(Body);
    458       }
    459 };
    460 
    461 
    462 /**
    463  * Same as LLVMCreateJITCompilerForModule, but:
    464  * - allows using MCJIT and enabling AVX feature where available.
    465  * - set target options
    466  *
    467  * See also:
    468  * - llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
    469  * - llvm/tools/lli/lli.cpp
    470  * - http://markmail.org/message/ttkuhvgj4cxxy2on#query:+page:1+mid:aju2dggerju3ivd3+state:results
    471  */
    472 extern "C"
    473 LLVMBool
    474 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
    475                                         lp_generated_code **OutCode,
    476                                         LLVMModuleRef M,
    477                                         LLVMMCJITMemoryManagerRef CMM,
    478                                         unsigned OptLevel,
    479                                         int useMCJIT,
    480                                         char **OutError)
    481 {
    482    using namespace llvm;
    483 
    484    std::string Error;
    485 #if HAVE_LLVM >= 0x0306
    486    EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
    487 #else
    488    EngineBuilder builder(unwrap(M));
    489 #endif
    490 
    491    /**
    492     * LLVM 3.1+ haven't more "extern unsigned llvm::StackAlignmentOverride" and
    493     * friends for configuring code generation options, like stack alignment.
    494     */
    495    TargetOptions options;
    496 #if defined(PIPE_ARCH_X86)
    497    options.StackAlignmentOverride = 4;
    498 #if HAVE_LLVM < 0x0304
    499    options.RealignStack = true;
    500 #endif
    501 #endif
    502 
    503 #if defined(DEBUG) && HAVE_LLVM < 0x0307
    504    options.JITEmitDebugInfo = true;
    505 #endif
    506 
    507    /* XXX: Workaround http://llvm.org/PR21435 */
    508 #if defined(DEBUG) || defined(PROFILE) || \
    509     (HAVE_LLVM >= 0x0303 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)))
    510 #if HAVE_LLVM < 0x0304
    511    options.NoFramePointerElimNonLeaf = true;
    512 #endif
    513 #if HAVE_LLVM < 0x0307
    514    options.NoFramePointerElim = true;
    515 #endif
    516 #endif
    517 
    518    builder.setEngineKind(EngineKind::JIT)
    519           .setErrorStr(&Error)
    520           .setTargetOptions(options)
    521           .setOptLevel((CodeGenOpt::Level)OptLevel);
    522 
    523    if (useMCJIT) {
    524 #if HAVE_LLVM < 0x0306
    525        builder.setUseMCJIT(true);
    526 #endif
    527 #ifdef _WIN32
    528        /*
    529         * MCJIT works on Windows, but currently only through ELF object format.
    530         *
    531         * XXX: We could use `LLVM_HOST_TRIPLE "-elf"` but LLVM_HOST_TRIPLE has
    532         * different strings for MinGW/MSVC, so better play it safe and be
    533         * explicit.
    534         */
    535 #  ifdef _WIN64
    536        LLVMSetTarget(M, "x86_64-pc-win32-elf");
    537 #  else
    538        LLVMSetTarget(M, "i686-pc-win32-elf");
    539 #  endif
    540 #endif
    541    }
    542 
    543    llvm::SmallVector<std::string, 16> MAttrs;
    544 
    545 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    546 #if HAVE_LLVM >= 0x0400
    547    /* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
    548     * which allows us to enable/disable code generation based
    549     * on the results of cpuid.
    550     */
    551    llvm::StringMap<bool> features;
    552    llvm::sys::getHostCPUFeatures(features);
    553 
    554    for (StringMapIterator<bool> f = features.begin();
    555         f != features.end();
    556         ++f) {
    557       MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
    558    }
    559 #else
    560    /*
    561     * We need to unset attributes because sometimes LLVM mistakenly assumes
    562     * certain features are present given the processor name.
    563     *
    564     * https://bugs.freedesktop.org/show_bug.cgi?id=92214
    565     * http://llvm.org/PR25021
    566     * http://llvm.org/PR19429
    567     * http://llvm.org/PR16721
    568     */
    569    MAttrs.push_back(util_cpu_caps.has_sse    ? "+sse"    : "-sse"   );
    570    MAttrs.push_back(util_cpu_caps.has_sse2   ? "+sse2"   : "-sse2"  );
    571    MAttrs.push_back(util_cpu_caps.has_sse3   ? "+sse3"   : "-sse3"  );
    572    MAttrs.push_back(util_cpu_caps.has_ssse3  ? "+ssse3"  : "-ssse3" );
    573 #if HAVE_LLVM >= 0x0304
    574    MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
    575 #else
    576    MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41"  : "-sse41" );
    577 #endif
    578 #if HAVE_LLVM >= 0x0304
    579    MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
    580 #else
    581    MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42"  : "-sse42" );
    582 #endif
    583    /*
    584     * AVX feature is not automatically detected from CPUID by the X86 target
    585     * yet, because the old (yet default) JIT engine is not capable of
    586     * emitting the opcodes. On newer llvm versions it is and at least some
    587     * versions (tested with 3.3) will emit avx opcodes without this anyway.
    588     */
    589    MAttrs.push_back(util_cpu_caps.has_avx  ? "+avx"  : "-avx");
    590    MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
    591    if (HAVE_LLVM >= 0x0304) {
    592       MAttrs.push_back(util_cpu_caps.has_fma  ? "+fma"  : "-fma");
    593    } else {
    594       /*
    595        * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and
    596        * llvm.fmuladd.v2f32 intrinsics when FMA is available.
    597        */
    598       MAttrs.push_back("-fma");
    599    }
    600    MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
    601    /* disable avx512 and all subvariants */
    602 #if HAVE_LLVM >= 0x0304
    603    MAttrs.push_back("-avx512cd");
    604    MAttrs.push_back("-avx512er");
    605    MAttrs.push_back("-avx512f");
    606    MAttrs.push_back("-avx512pf");
    607 #endif
    608 #if HAVE_LLVM >= 0x0305
    609    MAttrs.push_back("-avx512bw");
    610    MAttrs.push_back("-avx512dq");
    611    MAttrs.push_back("-avx512vl");
    612 #endif
    613 #endif
    614 #endif
    615 
    616 #if defined(PIPE_ARCH_PPC)
    617    MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
    618 #if (HAVE_LLVM >= 0x0304)
    619 #if (HAVE_LLVM <= 0x0307) || (HAVE_LLVM == 0x0308 && MESA_LLVM_VERSION_PATCH == 0)
    620    /*
    621     * Make sure VSX instructions are disabled
    622     * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
    623     */
    624    if (util_cpu_caps.has_altivec) {
    625       MAttrs.push_back("-vsx");
    626    }
    627 #else
    628    /*
    629     * However, bug 25503 is fixed, by the same fix that fixed
    630     * bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1):
    631     * Make sure VSX instructions are ENABLED
    632     * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=26775
    633     */
    634    if (util_cpu_caps.has_altivec) {
    635       MAttrs.push_back("+vsx");
    636    }
    637 #endif
    638 #endif
    639 #endif
    640 
    641    builder.setMAttrs(MAttrs);
    642 
    643    if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
    644       int n = MAttrs.size();
    645       if (n > 0) {
    646          debug_printf("llc -mattr option(s): ");
    647          for (int i = 0; i < n; i++)
    648             debug_printf("%s%s", MAttrs[i].c_str(), (i < n - 1) ? "," : "");
    649          debug_printf("\n");
    650       }
    651    }
    652 
    653 #if HAVE_LLVM >= 0x0305
    654    StringRef MCPU = llvm::sys::getHostCPUName();
    655    /*
    656     * The cpu bits are no longer set automatically, so need to set mcpu manually.
    657     * Note that the MAttrs set above will be sort of ignored (since we should
    658     * not set any which would not be set by specifying the cpu anyway).
    659     * It ought to be safe though since getHostCPUName() should include bits
    660     * not only from the cpu but environment as well (for instance if it's safe
    661     * to use avx instructions which need OS support). According to
    662     * http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this
    663     * right it may be necessary to specify older cpu (or disable mattrs) though
    664     * when not using MCJIT so no instructions are generated which the old JIT
    665     * can't handle. Not entirely sure if we really need to do anything yet.
    666     */
    667 #if defined(PIPE_ARCH_LITTLE_ENDIAN)  && defined(PIPE_ARCH_PPC_64)
    668    /*
    669     * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
    670     * resulting in (big-endian) "generic" being returned on
    671     * little-endian Power8NVL systems.  The result was that code that
    672     * attempted to load the least significant 32 bits of a 64-bit quantity
    673     * from memory loaded the wrong half.  This resulted in failures in some
    674     * Piglit tests, e.g.
    675     * .../arb_gpu_shader_fp64/execution/conversion/frag-conversion-explicit-double-uint
    676     */
    677    if (MCPU == "generic")
    678       MCPU = "pwr8";
    679 #endif
    680    builder.setMCPU(MCPU);
    681    if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
    682       debug_printf("llc -mcpu option: %s\n", MCPU.str().c_str());
    683    }
    684 #endif
    685 
    686    ShaderMemoryManager *MM = NULL;
    687    if (useMCJIT) {
    688        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
    689        MM = new ShaderMemoryManager(JMM);
    690        *OutCode = MM->getGeneratedCode();
    691 
    692 #if HAVE_LLVM >= 0x0306
    693        builder.setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager>(MM));
    694        MM = NULL; // ownership taken by std::unique_ptr
    695 #elif HAVE_LLVM > 0x0303
    696        builder.setMCJITMemoryManager(MM);
    697 #else
    698        builder.setJITMemoryManager(MM);
    699 #endif
    700    } else {
    701 #if HAVE_LLVM < 0x0306
    702        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
    703        MM = new ShaderMemoryManager(JMM);
    704        *OutCode = MM->getGeneratedCode();
    705 
    706        builder.setJITMemoryManager(MM);
    707 #else
    708        assert(0);
    709 #endif
    710    }
    711 
    712    ExecutionEngine *JIT;
    713 
    714    JIT = builder.create();
    715 #if LLVM_USE_INTEL_JITEVENTS
    716    JITEventListener *JEL = JITEventListener::createIntelJITEventListener();
    717    JIT->RegisterJITEventListener(JEL);
    718 #endif
    719    if (JIT) {
    720       *OutJIT = wrap(JIT);
    721       return 0;
    722    }
    723    lp_free_generated_code(*OutCode);
    724    *OutCode = 0;
    725    delete MM;
    726    *OutError = strdup(Error.c_str());
    727    return 1;
    728 }
    729 
    730 
    731 extern "C"
    732 void
    733 lp_free_generated_code(struct lp_generated_code *code)
    734 {
    735    ShaderMemoryManager::freeGeneratedCode(code);
    736 }
    737 
    738 extern "C"
    739 LLVMMCJITMemoryManagerRef
    740 lp_get_default_memory_manager()
    741 {
    742    BaseMemoryManager *mm;
    743 #if HAVE_LLVM < 0x0306
    744    mm = llvm::JITMemoryManager::CreateDefaultMemManager();
    745 #else
    746    mm = new llvm::SectionMemoryManager();
    747 #endif
    748    return reinterpret_cast<LLVMMCJITMemoryManagerRef>(mm);
    749 }
    750 
    751 extern "C"
    752 void
    753 lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr)
    754 {
    755    delete reinterpret_cast<BaseMemoryManager*>(memorymgr);
    756 }
    757 
    758 extern "C" void
    759 lp_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
    760 {
    761 #if HAVE_LLVM >= 0x0306
    762    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
    763    llvm::AttrBuilder B;
    764    B.addDereferenceableAttr(bytes);
    765    A->addAttr(llvm::AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
    766 #endif
    767 }
    768 
    769 extern "C" LLVMValueRef
    770 lp_get_called_value(LLVMValueRef call)
    771 {
    772 #if HAVE_LLVM >= 0x0309
    773 	return LLVMGetCalledValue(call);
    774 #elif HAVE_LLVM >= 0x0305
    775 	return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
    776 #else
    777 	return NULL; /* radeonsi doesn't support so old LLVM. */
    778 #endif
    779 }
    780 
    781 extern "C" bool
    782 lp_is_function(LLVMValueRef v)
    783 {
    784 #if HAVE_LLVM >= 0x0309
    785 	return LLVMGetValueKind(v) == LLVMFunctionValueKind;
    786 #else
    787 	return llvm::isa<llvm::Function>(llvm::unwrap(v));
    788 #endif
    789 }
    790 
    791 extern "C" LLVMBuilderRef
    792 lp_create_builder(LLVMContextRef ctx, bool unsafe_fpmath)
    793 {
    794    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
    795 
    796 #if HAVE_LLVM >= 0x0308
    797    if (unsafe_fpmath) {
    798       llvm::FastMathFlags flags;
    799       flags.setUnsafeAlgebra();
    800       llvm::unwrap(builder)->setFastMathFlags(flags);
    801    }
    802 #endif
    803 
    804    return builder;
    805 }
    806