Home | History | Annotate | Download | only in IR
      1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the auto-upgrade helper functions.
     11 // This is where deprecated IR intrinsics and other IR features are updated to
     12 // current specifications.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/IR/AutoUpgrade.h"
     17 #include "llvm/ADT/StringSwitch.h"
     18 #include "llvm/IR/Constants.h"
     19 #include "llvm/IR/DIBuilder.h"
     20 #include "llvm/IR/DebugInfo.h"
     21 #include "llvm/IR/DiagnosticInfo.h"
     22 #include "llvm/IR/Function.h"
     23 #include "llvm/IR/IRBuilder.h"
     24 #include "llvm/IR/Instruction.h"
     25 #include "llvm/IR/IntrinsicInst.h"
     26 #include "llvm/IR/LLVMContext.h"
     27 #include "llvm/IR/Module.h"
     28 #include "llvm/IR/Verifier.h"
     29 #include "llvm/Support/ErrorHandling.h"
     30 #include "llvm/Support/Regex.h"
     31 #include <cstring>
     32 using namespace llvm;
     33 
     34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
     35 
     36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
     37 // changed their type from v4f32 to v2i64.
     38 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
     39                                   Function *&NewFn) {
     40   // Check whether this is an old version of the function, which received
     41   // v4f32 arguments.
     42   Type *Arg0Type = F->getFunctionType()->getParamType(0);
     43   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
     44     return false;
     45 
     46   // Yes, it's old, replace it with new version.
     47   rename(F);
     48   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
     49   return true;
     50 }
     51 
     52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
     53 // arguments have changed their type from i32 to i8.
     54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
     55                                              Function *&NewFn) {
     56   // Check that the last argument is an i32.
     57   Type *LastArgType = F->getFunctionType()->getParamType(
     58      F->getFunctionType()->getNumParams() - 1);
     59   if (!LastArgType->isIntegerTy(32))
     60     return false;
     61 
     62   // Move this function aside and map down.
     63   rename(F);
     64   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
     65   return true;
     66 }
     67 
     68 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
     69   // All of the intrinsics matches below should be marked with which llvm
     70   // version started autoupgrading them. At some point in the future we would
     71   // like to use this information to remove upgrade code for some older
     72   // intrinsics. It is currently undecided how we will determine that future
     73   // point.
     74   if (Name=="ssse3.pabs.b.128" || // Added in 6.0
     75       Name=="ssse3.pabs.w.128" || // Added in 6.0
     76       Name=="ssse3.pabs.d.128" || // Added in 6.0
     77       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
     78       Name.startswith("fma.vfmadd.") || // Added in 7.0
     79       Name.startswith("fma.vfmsub.") || // Added in 7.0
     80       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
     81       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
     82       Name.startswith("fma.vfnmadd.") || // Added in 7.0
     83       Name.startswith("fma.vfnmsub.") || // Added in 7.0
     84       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
     85       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
     86       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
     87       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
     88       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
     89       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
     90       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
     91       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
     92       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
     93       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
     94       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
     95       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
     96       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
     97       Name.startswith("avx512.kunpck") || //added in 6.0
     98       Name.startswith("avx2.pabs.") || // Added in 6.0
     99       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
    100       Name.startswith("avx512.broadcastm") || // Added in 6.0
    101       Name == "sse.sqrt.ss" || // Added in 7.0
    102       Name == "sse2.sqrt.sd" || // Added in 7.0
    103       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
    104       Name.startswith("avx.sqrt.p") || // Added in 7.0
    105       Name.startswith("sse2.sqrt.p") || // Added in 7.0
    106       Name.startswith("sse.sqrt.p") || // Added in 7.0
    107       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
    108       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
    109       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
    110       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
    111       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
    112       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
    113       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
    114       Name.startswith("avx.vperm2f128.") || // Added in 6.0
    115       Name == "avx2.vperm2i128" || // Added in 6.0
    116       Name == "sse.add.ss" || // Added in 4.0
    117       Name == "sse2.add.sd" || // Added in 4.0
    118       Name == "sse.sub.ss" || // Added in 4.0
    119       Name == "sse2.sub.sd" || // Added in 4.0
    120       Name == "sse.mul.ss" || // Added in 4.0
    121       Name == "sse2.mul.sd" || // Added in 4.0
    122       Name == "sse.div.ss" || // Added in 4.0
    123       Name == "sse2.div.sd" || // Added in 4.0
    124       Name == "sse41.pmaxsb" || // Added in 3.9
    125       Name == "sse2.pmaxs.w" || // Added in 3.9
    126       Name == "sse41.pmaxsd" || // Added in 3.9
    127       Name == "sse2.pmaxu.b" || // Added in 3.9
    128       Name == "sse41.pmaxuw" || // Added in 3.9
    129       Name == "sse41.pmaxud" || // Added in 3.9
    130       Name == "sse41.pminsb" || // Added in 3.9
    131       Name == "sse2.pmins.w" || // Added in 3.9
    132       Name == "sse41.pminsd" || // Added in 3.9
    133       Name == "sse2.pminu.b" || // Added in 3.9
    134       Name == "sse41.pminuw" || // Added in 3.9
    135       Name == "sse41.pminud" || // Added in 3.9
    136       Name == "avx512.kand.w" || // Added in 7.0
    137       Name == "avx512.kandn.w" || // Added in 7.0
    138       Name == "avx512.knot.w" || // Added in 7.0
    139       Name == "avx512.kor.w" || // Added in 7.0
    140       Name == "avx512.kxor.w" || // Added in 7.0
    141       Name == "avx512.kxnor.w" || // Added in 7.0
    142       Name == "avx512.kortestc.w" || // Added in 7.0
    143       Name == "avx512.kortestz.w" || // Added in 7.0
    144       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
    145       Name.startswith("avx2.pmax") || // Added in 3.9
    146       Name.startswith("avx2.pmin") || // Added in 3.9
    147       Name.startswith("avx512.mask.pmax") || // Added in 4.0
    148       Name.startswith("avx512.mask.pmin") || // Added in 4.0
    149       Name.startswith("avx2.vbroadcast") || // Added in 3.8
    150       Name.startswith("avx2.pbroadcast") || // Added in 3.8
    151       Name.startswith("avx.vpermil.") || // Added in 3.1
    152       Name.startswith("sse2.pshuf") || // Added in 3.9
    153       Name.startswith("avx512.pbroadcast") || // Added in 3.9
    154       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
    155       Name.startswith("avx512.mask.movddup") || // Added in 3.9
    156       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
    157       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
    158       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
    159       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
    160       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
    161       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
    162       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
    163       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
    164       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
    165       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
    166       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
    167       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
    168       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
    169       Name.startswith("avx512.mask.pand.") || // Added in 3.9
    170       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
    171       Name.startswith("avx512.mask.por.") || // Added in 3.9
    172       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
    173       Name.startswith("avx512.mask.and.") || // Added in 3.9
    174       Name.startswith("avx512.mask.andn.") || // Added in 3.9
    175       Name.startswith("avx512.mask.or.") || // Added in 3.9
    176       Name.startswith("avx512.mask.xor.") || // Added in 3.9
    177       Name.startswith("avx512.mask.padd.") || // Added in 4.0
    178       Name.startswith("avx512.mask.psub.") || // Added in 4.0
    179       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
    180       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
    181       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
    182       Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
    183       Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
    184       Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
    185       Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
    186       Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
    187       Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
    188       Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
    189       Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
    190       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
    191       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
    192       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
    193       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
    194       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
    195       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
    196       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
    197       Name == "avx512.cvtusi2sd" || // Added in 7.0
    198       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
    199       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
    200       Name == "sse2.pmulu.dq" || // Added in 7.0
    201       Name == "sse41.pmuldq" || // Added in 7.0
    202       Name == "avx2.pmulu.dq" || // Added in 7.0
    203       Name == "avx2.pmul.dq" || // Added in 7.0
    204       Name == "avx512.pmulu.dq.512" || // Added in 7.0
    205       Name == "avx512.pmul.dq.512" || // Added in 7.0
    206       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
    207       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
    208       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
    209       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
    210       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
    211       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
    212       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
    213       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
    214       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
    215       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
    216       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
    217       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
    218       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
    219       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
    220       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
    221       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
    222       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
    223       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
    224       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
    225       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
    226       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
    227       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
    228       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
    229       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
    230       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
    231       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
    232       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
    233       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
    234       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
    235       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
    236       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
    237       Name.startswith("avx512.mask.pslli") || // Added in 4.0
    238       Name.startswith("avx512.mask.psrai") || // Added in 4.0
    239       Name.startswith("avx512.mask.psrli") || // Added in 4.0
    240       Name.startswith("avx512.mask.psllv") || // Added in 4.0
    241       Name.startswith("avx512.mask.psrav") || // Added in 4.0
    242       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
    243       Name.startswith("sse41.pmovsx") || // Added in 3.8
    244       Name.startswith("sse41.pmovzx") || // Added in 3.9
    245       Name.startswith("avx2.pmovsx") || // Added in 3.9
    246       Name.startswith("avx2.pmovzx") || // Added in 3.9
    247       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
    248       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
    249       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
    250       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
    251       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
    252       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
    253       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
    254       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
    255       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
    256       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
    257       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
    258       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
    259       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
    260       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
    261       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
    262       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
    263       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
    264       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
    265       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
    266       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
    267       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
    268       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
    269       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
    270       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
    271       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
    272       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
    273       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
    274       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
    275       Name.startswith("avx512.mask.prorv.") || // Added in 7.0
    276       Name.startswith("avx512.mask.pror.") || // Added in 7.0
    277       Name.startswith("avx512.mask.prolv.") || // Added in 7.0
    278       Name.startswith("avx512.mask.prol.") || // Added in 7.0
    279       Name == "sse.cvtsi2ss" || // Added in 7.0
    280       Name == "sse.cvtsi642ss" || // Added in 7.0
    281       Name == "sse2.cvtsi2sd" || // Added in 7.0
    282       Name == "sse2.cvtsi642sd" || // Added in 7.0
    283       Name == "sse2.cvtss2sd" || // Added in 7.0
    284       Name == "sse2.cvtdq2pd" || // Added in 3.9
    285       Name == "sse2.cvtdq2ps" || // Added in 7.0
    286       Name == "sse2.cvtps2pd" || // Added in 3.9
    287       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
    288       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
    289       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
    290       Name.startswith("avx.vinsertf128.") || // Added in 3.7
    291       Name == "avx2.vinserti128" || // Added in 3.7
    292       Name.startswith("avx512.mask.insert") || // Added in 4.0
    293       Name.startswith("avx.vextractf128.") || // Added in 3.7
    294       Name == "avx2.vextracti128" || // Added in 3.7
    295       Name.startswith("avx512.mask.vextract") || // Added in 4.0
    296       Name.startswith("sse4a.movnt.") || // Added in 3.9
    297       Name.startswith("avx.movnt.") || // Added in 3.2
    298       Name.startswith("avx512.storent.") || // Added in 3.9
    299       Name == "sse41.movntdqa" || // Added in 5.0
    300       Name == "avx2.movntdqa" || // Added in 5.0
    301       Name == "avx512.movntdqa" || // Added in 5.0
    302       Name == "sse2.storel.dq" || // Added in 3.9
    303       Name.startswith("sse.storeu.") || // Added in 3.9
    304       Name.startswith("sse2.storeu.") || // Added in 3.9
    305       Name.startswith("avx.storeu.") || // Added in 3.9
    306       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
    307       Name.startswith("avx512.mask.store.p") || // Added in 3.9
    308       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
    309       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
    310       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
    311       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
    312       Name == "avx512.mask.store.ss" || // Added in 7.0
    313       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
    314       Name.startswith("avx512.mask.load.") || // Added in 3.9
    315       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
    316       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
    317       Name == "sse42.crc32.64.8" || // Added in 3.4
    318       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
    319       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
    320       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
    321       Name.startswith("avx512.mask.valign.") || // Added in 4.0
    322       Name.startswith("sse2.psll.dq") || // Added in 3.7
    323       Name.startswith("sse2.psrl.dq") || // Added in 3.7
    324       Name.startswith("avx2.psll.dq") || // Added in 3.7
    325       Name.startswith("avx2.psrl.dq") || // Added in 3.7
    326       Name.startswith("avx512.psll.dq") || // Added in 3.9
    327       Name.startswith("avx512.psrl.dq") || // Added in 3.9
    328       Name == "sse41.pblendw" || // Added in 3.7
    329       Name.startswith("sse41.blendp") || // Added in 3.7
    330       Name.startswith("avx.blend.p") || // Added in 3.7
    331       Name == "avx2.pblendw" || // Added in 3.7
    332       Name.startswith("avx2.pblendd.") || // Added in 3.7
    333       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
    334       Name == "avx2.vbroadcasti128" || // Added in 3.7
    335       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
    336       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
    337       Name == "xop.vpcmov" || // Added in 3.8
    338       Name == "xop.vpcmov.256" || // Added in 5.0
    339       Name.startswith("avx512.mask.move.s") || // Added in 4.0
    340       Name.startswith("avx512.cvtmask2") || // Added in 5.0
    341       (Name.startswith("xop.vpcom") && // Added in 3.2
    342        F->arg_size() == 2) ||
    343       Name.startswith("avx512.ptestm") || //Added in 6.0
    344       Name.startswith("avx512.ptestnm") || //Added in 6.0
    345       Name.startswith("sse2.pavg") || // Added in 6.0
    346       Name.startswith("avx2.pavg") || // Added in 6.0
    347       Name.startswith("avx512.mask.pavg")) // Added in 6.0
    348     return true;
    349 
    350   return false;
    351 }
    352 
    353 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
    354                                         Function *&NewFn) {
    355   // Only handle intrinsics that start with "x86.".
    356   if (!Name.startswith("x86."))
    357     return false;
    358   // Remove "x86." prefix.
    359   Name = Name.substr(4);
    360 
    361   if (ShouldUpgradeX86Intrinsic(F, Name)) {
    362     NewFn = nullptr;
    363     return true;
    364   }
    365 
    366   // SSE4.1 ptest functions may have an old signature.
    367   if (Name.startswith("sse41.ptest")) { // Added in 3.2
    368     if (Name.substr(11) == "c")
    369       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
    370     if (Name.substr(11) == "z")
    371       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
    372     if (Name.substr(11) == "nzc")
    373       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
    374   }
    375   // Several blend and other instructions with masks used the wrong number of
    376   // bits.
    377   if (Name == "sse41.insertps") // Added in 3.6
    378     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
    379                                             NewFn);
    380   if (Name == "sse41.dppd") // Added in 3.6
    381     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
    382                                             NewFn);
    383   if (Name == "sse41.dpps") // Added in 3.6
    384     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
    385                                             NewFn);
    386   if (Name == "sse41.mpsadbw") // Added in 3.6
    387     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
    388                                             NewFn);
    389   if (Name == "avx.dp.ps.256") // Added in 3.6
    390     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
    391                                             NewFn);
    392   if (Name == "avx2.mpsadbw") // Added in 3.6
    393     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
    394                                             NewFn);
    395 
    396   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
    397   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
    398     rename(F);
    399     NewFn = Intrinsic::getDeclaration(F->getParent(),
    400                                       Intrinsic::x86_xop_vfrcz_ss);
    401     return true;
    402   }
    403   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
    404     rename(F);
    405     NewFn = Intrinsic::getDeclaration(F->getParent(),
    406                                       Intrinsic::x86_xop_vfrcz_sd);
    407     return true;
    408   }
    409   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
    410   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
    411     auto Idx = F->getFunctionType()->getParamType(2);
    412     if (Idx->isFPOrFPVectorTy()) {
    413       rename(F);
    414       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
    415       unsigned EltSize = Idx->getScalarSizeInBits();
    416       Intrinsic::ID Permil2ID;
    417       if (EltSize == 64 && IdxSize == 128)
    418         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
    419       else if (EltSize == 32 && IdxSize == 128)
    420         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
    421       else if (EltSize == 64 && IdxSize == 256)
    422         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
    423       else
    424         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
    425       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
    426       return true;
    427     }
    428   }
    429 
    430   return false;
    431 }
    432 
    433 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
    434   assert(F && "Illegal to upgrade a non-existent Function.");
    435 
    436   // Quickly eliminate it, if it's not a candidate.
    437   StringRef Name = F->getName();
    438   if (Name.size() <= 8 || !Name.startswith("llvm."))
    439     return false;
    440   Name = Name.substr(5); // Strip off "llvm."
    441 
    442   switch (Name[0]) {
    443   default: break;
    444   case 'a': {
    445     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
    446       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
    447                                         F->arg_begin()->getType());
    448       return true;
    449     }
    450     if (Name.startswith("arm.neon.vclz")) {
    451       Type* args[2] = {
    452         F->arg_begin()->getType(),
    453         Type::getInt1Ty(F->getContext())
    454       };
    455       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
    456       // the end of the name. Change name from llvm.arm.neon.vclz.* to
    457       //  llvm.ctlz.*
    458       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
    459       NewFn = Function::Create(fType, F->getLinkage(),
    460                                "llvm.ctlz." + Name.substr(14), F->getParent());
    461       return true;
    462     }
    463     if (Name.startswith("arm.neon.vcnt")) {
    464       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
    465                                         F->arg_begin()->getType());
    466       return true;
    467     }
    468     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
    469     if (vldRegex.match(Name)) {
    470       auto fArgs = F->getFunctionType()->params();
    471       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
    472       // Can't use Intrinsic::getDeclaration here as the return types might
    473       // then only be structurally equal.
    474       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
    475       NewFn = Function::Create(fType, F->getLinkage(),
    476                                "llvm." + Name + ".p0i8", F->getParent());
    477       return true;
    478     }
    479     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
    480     if (vstRegex.match(Name)) {
    481       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
    482                                                 Intrinsic::arm_neon_vst2,
    483                                                 Intrinsic::arm_neon_vst3,
    484                                                 Intrinsic::arm_neon_vst4};
    485 
    486       static const Intrinsic::ID StoreLaneInts[] = {
    487         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
    488         Intrinsic::arm_neon_vst4lane
    489       };
    490 
    491       auto fArgs = F->getFunctionType()->params();
    492       Type *Tys[] = {fArgs[0], fArgs[1]};
    493       if (Name.find("lane") == StringRef::npos)
    494         NewFn = Intrinsic::getDeclaration(F->getParent(),
    495                                           StoreInts[fArgs.size() - 3], Tys);
    496       else
    497         NewFn = Intrinsic::getDeclaration(F->getParent(),
    498                                           StoreLaneInts[fArgs.size() - 5], Tys);
    499       return true;
    500     }
    501     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
    502       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
    503       return true;
    504     }
    505     break;
    506   }
    507 
    508   case 'c': {
    509     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
    510       rename(F);
    511       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
    512                                         F->arg_begin()->getType());
    513       return true;
    514     }
    515     if (Name.startswith("cttz.") && F->arg_size() == 1) {
    516       rename(F);
    517       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
    518                                         F->arg_begin()->getType());
    519       return true;
    520     }
    521     break;
    522   }
    523   case 'd': {
    524     if (Name == "dbg.value" && F->arg_size() == 4) {
    525       rename(F);
    526       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
    527       return true;
    528     }
    529     break;
    530   }
    531   case 'i':
    532   case 'l': {
    533     bool IsLifetimeStart = Name.startswith("lifetime.start");
    534     if (IsLifetimeStart || Name.startswith("invariant.start")) {
    535       Intrinsic::ID ID = IsLifetimeStart ?
    536         Intrinsic::lifetime_start : Intrinsic::invariant_start;
    537       auto Args = F->getFunctionType()->params();
    538       Type* ObjectPtr[1] = {Args[1]};
    539       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
    540         rename(F);
    541         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
    542         return true;
    543       }
    544     }
    545 
    546     bool IsLifetimeEnd = Name.startswith("lifetime.end");
    547     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
    548       Intrinsic::ID ID = IsLifetimeEnd ?
    549         Intrinsic::lifetime_end : Intrinsic::invariant_end;
    550 
    551       auto Args = F->getFunctionType()->params();
    552       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
    553       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
    554         rename(F);
    555         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
    556         return true;
    557       }
    558     }
    559     if (Name.startswith("invariant.group.barrier")) {
    560       // Rename invariant.group.barrier to launder.invariant.group
    561       auto Args = F->getFunctionType()->params();
    562       Type* ObjectPtr[1] = {Args[0]};
    563       rename(F);
    564       NewFn = Intrinsic::getDeclaration(F->getParent(),
    565           Intrinsic::launder_invariant_group, ObjectPtr);
    566       return true;
    567 
    568     }
    569 
    570     break;
    571   }
    572   case 'm': {
    573     if (Name.startswith("masked.load.")) {
    574       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
    575       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
    576         rename(F);
    577         NewFn = Intrinsic::getDeclaration(F->getParent(),
    578                                           Intrinsic::masked_load,
    579                                           Tys);
    580         return true;
    581       }
    582     }
    583     if (Name.startswith("masked.store.")) {
    584       auto Args = F->getFunctionType()->params();
    585       Type *Tys[] = { Args[0], Args[1] };
    586       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
    587         rename(F);
    588         NewFn = Intrinsic::getDeclaration(F->getParent(),
    589                                           Intrinsic::masked_store,
    590                                           Tys);
    591         return true;
    592       }
    593     }
    594     // Renaming gather/scatter intrinsics with no address space overloading
    595     // to the new overload which includes an address space
    596     if (Name.startswith("masked.gather.")) {
    597       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
    598       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
    599         rename(F);
    600         NewFn = Intrinsic::getDeclaration(F->getParent(),
    601                                           Intrinsic::masked_gather, Tys);
    602         return true;
    603       }
    604     }
    605     if (Name.startswith("masked.scatter.")) {
    606       auto Args = F->getFunctionType()->params();
    607       Type *Tys[] = {Args[0], Args[1]};
    608       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
    609         rename(F);
    610         NewFn = Intrinsic::getDeclaration(F->getParent(),
    611                                           Intrinsic::masked_scatter, Tys);
    612         return true;
    613       }
    614     }
    615     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
    616     // alignment parameter to embedding the alignment as an attribute of
    617     // the pointer args.
    618     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
    619       rename(F);
    620       // Get the types of dest, src, and len
    621       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
    622       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
    623                                         ParamTypes);
    624       return true;
    625     }
    626     if (Name.startswith("memmove.") && F->arg_size() == 5) {
    627       rename(F);
    628       // Get the types of dest, src, and len
    629       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
    630       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
    631                                         ParamTypes);
    632       return true;
    633     }
    634     if (Name.startswith("memset.") && F->arg_size() == 5) {
    635       rename(F);
    636       // Get the types of dest, and len
    637       const auto *FT = F->getFunctionType();
    638       Type *ParamTypes[2] = {
    639           FT->getParamType(0), // Dest
    640           FT->getParamType(2)  // len
    641       };
    642       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
    643                                         ParamTypes);
    644       return true;
    645     }
    646     break;
    647   }
    648   case 'n': {
    649     if (Name.startswith("nvvm.")) {
    650       Name = Name.substr(5);
    651 
    652       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
    653       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
    654                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
    655                               .Case("clz.i", Intrinsic::ctlz)
    656                               .Case("popc.i", Intrinsic::ctpop)
    657                               .Default(Intrinsic::not_intrinsic);
    658       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
    659         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
    660                                           {F->getReturnType()});
    661         return true;
    662       }
    663 
    664       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
    665       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
    666       //
    667       // TODO: We could add lohi.i2d.
    668       bool Expand = StringSwitch<bool>(Name)
    669                         .Cases("abs.i", "abs.ll", true)
    670                         .Cases("clz.ll", "popc.ll", "h2f", true)
    671                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
    672                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
    673                         .Default(false);
    674       if (Expand) {
    675         NewFn = nullptr;
    676         return true;
    677       }
    678     }
    679     break;
    680   }
    681   case 'o':
    682     // We only need to change the name to match the mangling including the
    683     // address space.
    684     if (Name.startswith("objectsize.")) {
    685       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
    686       if (F->arg_size() == 2 ||
    687           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
    688         rename(F);
    689         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
    690                                           Tys);
    691         return true;
    692       }
    693     }
    694     break;
    695 
    696   case 's':
    697     if (Name == "stackprotectorcheck") {
    698       NewFn = nullptr;
    699       return true;
    700     }
    701     break;
    702 
    703   case 'x':
    704     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
    705       return true;
    706   }
    707   // Remangle our intrinsic since we upgrade the mangling
    708   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
    709   if (Result != None) {
    710     NewFn = Result.getValue();
    711     return true;
    712   }
    713 
    714   //  This may not belong here. This function is effectively being overloaded
    715   //  to both detect an intrinsic which needs upgrading, and to provide the
    716   //  upgraded form of the intrinsic. We should perhaps have two separate
    717   //  functions for this.
    718   return false;
    719 }
    720 
    721 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
    722   NewFn = nullptr;
    723   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
    724   assert(F != NewFn && "Intrinsic function upgraded to the same function");
    725 
    726   // Upgrade intrinsic attributes.  This does not change the function.
    727   if (NewFn)
    728     F = NewFn;
    729   if (Intrinsic::ID id = F->getIntrinsicID())
    730     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
    731   return Upgraded;
    732 }
    733 
    734 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
    735   // Nothing to do yet.
    736   return false;
    737 }
    738 
    739 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
    740 // to byte shuffles.
    741 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
    742                                          Value *Op, unsigned Shift) {
    743   Type *ResultTy = Op->getType();
    744   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
    745 
    746   // Bitcast from a 64-bit element type to a byte element type.
    747   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
    748   Op = Builder.CreateBitCast(Op, VecTy, "cast");
    749 
    750   // We'll be shuffling in zeroes.
    751   Value *Res = Constant::getNullValue(VecTy);
    752 
    753   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
    754   // we'll just return the zero vector.
    755   if (Shift < 16) {
    756     uint32_t Idxs[64];
    757     // 256/512-bit version is split into 2/4 16-byte lanes.
    758     for (unsigned l = 0; l != NumElts; l += 16)
    759       for (unsigned i = 0; i != 16; ++i) {
    760         unsigned Idx = NumElts + i - Shift;
    761         if (Idx < NumElts)
    762           Idx -= NumElts - 16; // end of lane, switch operand.
    763         Idxs[l + i] = Idx + l;
    764       }
    765 
    766     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
    767   }
    768 
    769   // Bitcast back to a 64-bit element type.
    770   return Builder.CreateBitCast(Res, ResultTy, "cast");
    771 }
    772 
    773 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
    774 // to byte shuffles.
    775 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
    776                                          unsigned Shift) {
    777   Type *ResultTy = Op->getType();
    778   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
    779 
    780   // Bitcast from a 64-bit element type to a byte element type.
    781   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
    782   Op = Builder.CreateBitCast(Op, VecTy, "cast");
    783 
    784   // We'll be shuffling in zeroes.
    785   Value *Res = Constant::getNullValue(VecTy);
    786 
    787   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
    788   // we'll just return the zero vector.
    789   if (Shift < 16) {
    790     uint32_t Idxs[64];
    791     // 256/512-bit version is split into 2/4 16-byte lanes.
    792     for (unsigned l = 0; l != NumElts; l += 16)
    793       for (unsigned i = 0; i != 16; ++i) {
    794         unsigned Idx = i + Shift;
    795         if (Idx >= 16)
    796           Idx += NumElts - 16; // end of lane, switch operand.
    797         Idxs[l + i] = Idx + l;
    798       }
    799 
    800     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
    801   }
    802 
    803   // Bitcast back to a 64-bit element type.
    804   return Builder.CreateBitCast(Res, ResultTy, "cast");
    805 }
    806 
    807 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
    808                             unsigned NumElts) {
    809   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
    810                              cast<IntegerType>(Mask->getType())->getBitWidth());
    811   Mask = Builder.CreateBitCast(Mask, MaskTy);
    812 
    813   // If we have less than 8 elements, then the starting mask was an i8 and
    814   // we need to extract down to the right number of elements.
    815   if (NumElts < 8) {
    816     uint32_t Indices[4];
    817     for (unsigned i = 0; i != NumElts; ++i)
    818       Indices[i] = i;
    819     Mask = Builder.CreateShuffleVector(Mask, Mask,
    820                                        makeArrayRef(Indices, NumElts),
    821                                        "extract");
    822   }
    823 
    824   return Mask;
    825 }
    826 
    827 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
    828                             Value *Op0, Value *Op1) {
    829   // If the mask is all ones just emit the first operation.
    830   if (const auto *C = dyn_cast<Constant>(Mask))
    831     if (C->isAllOnesValue())
    832       return Op0;
    833 
    834   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
    835   return Builder.CreateSelect(Mask, Op0, Op1);
    836 }
    837 
    838 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
    839                                   Value *Op0, Value *Op1) {
    840   // If the mask is all ones just emit the first operation.
    841   if (const auto *C = dyn_cast<Constant>(Mask))
    842     if (C->isAllOnesValue())
    843       return Op0;
    844 
    845   llvm::VectorType *MaskTy =
    846     llvm::VectorType::get(Builder.getInt1Ty(),
    847                           Mask->getType()->getIntegerBitWidth());
    848   Mask = Builder.CreateBitCast(Mask, MaskTy);
    849   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
    850   return Builder.CreateSelect(Mask, Op0, Op1);
    851 }
    852 
    853 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
    854 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
    855 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
    856 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
    857                                         Value *Op1, Value *Shift,
    858                                         Value *Passthru, Value *Mask,
    859                                         bool IsVALIGN) {
    860   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
    861 
    862   unsigned NumElts = Op0->getType()->getVectorNumElements();
    863   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
    864   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
    865   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
    866 
    867   // Mask the immediate for VALIGN.
    868   if (IsVALIGN)
    869     ShiftVal &= (NumElts - 1);
    870 
    871   // If palignr is shifting the pair of vectors more than the size of two
    872   // lanes, emit zero.
    873   if (ShiftVal >= 32)
    874     return llvm::Constant::getNullValue(Op0->getType());
    875 
    876   // If palignr is shifting the pair of input vectors more than one lane,
    877   // but less than two lanes, convert to shifting in zeroes.
    878   if (ShiftVal > 16) {
    879     ShiftVal -= 16;
    880     Op1 = Op0;
    881     Op0 = llvm::Constant::getNullValue(Op0->getType());
    882   }
    883 
    884   uint32_t Indices[64];
    885   // 256-bit palignr operates on 128-bit lanes so we need to handle that
    886   for (unsigned l = 0; l < NumElts; l += 16) {
    887     for (unsigned i = 0; i != 16; ++i) {
    888       unsigned Idx = ShiftVal + i;
    889       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
    890         Idx += NumElts - 16; // End of lane, switch operand.
    891       Indices[l + i] = Idx + l;
    892     }
    893   }
    894 
    895   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
    896                                              makeArrayRef(Indices, NumElts),
    897                                              "palignr");
    898 
    899   return EmitX86Select(Builder, Mask, Align, Passthru);
    900 }
    901 
    902 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
    903                                  Value *Ptr, Value *Data, Value *Mask,
    904                                  bool Aligned) {
    905   // Cast the pointer to the right type.
    906   Ptr = Builder.CreateBitCast(Ptr,
    907                               llvm::PointerType::getUnqual(Data->getType()));
    908   unsigned Align =
    909     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
    910 
    911   // If the mask is all ones just emit a regular store.
    912   if (const auto *C = dyn_cast<Constant>(Mask))
    913     if (C->isAllOnesValue())
    914       return Builder.CreateAlignedStore(Data, Ptr, Align);
    915 
    916   // Convert the mask from an integer type to a vector of i1.
    917   unsigned NumElts = Data->getType()->getVectorNumElements();
    918   Mask = getX86MaskVec(Builder, Mask, NumElts);
    919   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
    920 }
    921 
    922 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
    923                                 Value *Ptr, Value *Passthru, Value *Mask,
    924                                 bool Aligned) {
    925   // Cast the pointer to the right type.
    926   Ptr = Builder.CreateBitCast(Ptr,
    927                              llvm::PointerType::getUnqual(Passthru->getType()));
    928   unsigned Align =
    929     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
    930 
    931   // If the mask is all ones just emit a regular store.
    932   if (const auto *C = dyn_cast<Constant>(Mask))
    933     if (C->isAllOnesValue())
    934       return Builder.CreateAlignedLoad(Ptr, Align);
    935 
    936   // Convert the mask from an integer type to a vector of i1.
    937   unsigned NumElts = Passthru->getType()->getVectorNumElements();
    938   Mask = getX86MaskVec(Builder, Mask, NumElts);
    939   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
    940 }
    941 
    942 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
    943   Value *Op0 = CI.getArgOperand(0);
    944   llvm::Type *Ty = Op0->getType();
    945   Value *Zero = llvm::Constant::getNullValue(Ty);
    946   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
    947   Value *Neg = Builder.CreateNeg(Op0);
    948   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
    949 
    950   if (CI.getNumArgOperands() == 3)
    951     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
    952 
    953   return Res;
    954 }
    955 
    956 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
    957                                ICmpInst::Predicate Pred) {
    958   Value *Op0 = CI.getArgOperand(0);
    959   Value *Op1 = CI.getArgOperand(1);
    960   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
    961   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
    962 
    963   if (CI.getNumArgOperands() == 4)
    964     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
    965 
    966   return Res;
    967 }
    968 
    969 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
    970   Type *Ty = CI.getType();
    971 
    972   // Arguments have a vXi32 type so cast to vXi64.
    973   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
    974   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
    975 
    976   if (IsSigned) {
    977     // Shift left then arithmetic shift right.
    978     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
    979     LHS = Builder.CreateShl(LHS, ShiftAmt);
    980     LHS = Builder.CreateAShr(LHS, ShiftAmt);
    981     RHS = Builder.CreateShl(RHS, ShiftAmt);
    982     RHS = Builder.CreateAShr(RHS, ShiftAmt);
    983   } else {
    984     // Clear the upper bits.
    985     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
    986     LHS = Builder.CreateAnd(LHS, Mask);
    987     RHS = Builder.CreateAnd(RHS, Mask);
    988   }
    989 
    990   Value *Res = Builder.CreateMul(LHS, RHS);
    991 
    992   if (CI.getNumArgOperands() == 4)
    993     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
    994 
    995   return Res;
    996 }
    997 
    998 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
    999 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
   1000                                      Value *Mask) {
   1001   unsigned NumElts = Vec->getType()->getVectorNumElements();
   1002   if (Mask) {
   1003     const auto *C = dyn_cast<Constant>(Mask);
   1004     if (!C || !C->isAllOnesValue())
   1005       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
   1006   }
   1007 
   1008   if (NumElts < 8) {
   1009     uint32_t Indices[8];
   1010     for (unsigned i = 0; i != NumElts; ++i)
   1011       Indices[i] = i;
   1012     for (unsigned i = NumElts; i != 8; ++i)
   1013       Indices[i] = NumElts + i % NumElts;
   1014     Vec = Builder.CreateShuffleVector(Vec,
   1015                                       Constant::getNullValue(Vec->getType()),
   1016                                       Indices);
   1017   }
   1018   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
   1019 }
   1020 
   1021 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
   1022                                    unsigned CC, bool Signed) {
   1023   Value *Op0 = CI.getArgOperand(0);
   1024   unsigned NumElts = Op0->getType()->getVectorNumElements();
   1025 
   1026   Value *Cmp;
   1027   if (CC == 3) {
   1028     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
   1029   } else if (CC == 7) {
   1030     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
   1031   } else {
   1032     ICmpInst::Predicate Pred;
   1033     switch (CC) {
   1034     default: llvm_unreachable("Unknown condition code");
   1035     case 0: Pred = ICmpInst::ICMP_EQ;  break;
   1036     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
   1037     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
   1038     case 4: Pred = ICmpInst::ICMP_NE;  break;
   1039     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
   1040     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
   1041     }
   1042     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
   1043   }
   1044 
   1045   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
   1046 
   1047   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
   1048 }
   1049 
   1050 // Replace a masked intrinsic with an older unmasked intrinsic.
   1051 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
   1052                                     Intrinsic::ID IID) {
   1053   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
   1054   Value *Rep = Builder.CreateCall(Intrin,
   1055                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
   1056   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
   1057 }
   1058 
   1059 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
   1060   Value* A = CI.getArgOperand(0);
   1061   Value* B = CI.getArgOperand(1);
   1062   Value* Src = CI.getArgOperand(2);
   1063   Value* Mask = CI.getArgOperand(3);
   1064 
   1065   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
   1066   Value* Cmp = Builder.CreateIsNotNull(AndNode);
   1067   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
   1068   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
   1069   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
   1070   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
   1071 }
   1072 
   1073 
   1074 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
   1075   Value* Op = CI.getArgOperand(0);
   1076   Type* ReturnOp = CI.getType();
   1077   unsigned NumElts = CI.getType()->getVectorNumElements();
   1078   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
   1079   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
   1080 }
   1081 
   1082 // Replace intrinsic with unmasked version and a select.
   1083 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
   1084                                       CallInst &CI, Value *&Rep) {
   1085   Name = Name.substr(12); // Remove avx512.mask.
   1086 
   1087   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
   1088   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
   1089   Intrinsic::ID IID;
   1090   if (Name.startswith("max.p")) {
   1091     if (VecWidth == 128 && EltWidth == 32)
   1092       IID = Intrinsic::x86_sse_max_ps;
   1093     else if (VecWidth == 128 && EltWidth == 64)
   1094       IID = Intrinsic::x86_sse2_max_pd;
   1095     else if (VecWidth == 256 && EltWidth == 32)
   1096       IID = Intrinsic::x86_avx_max_ps_256;
   1097     else if (VecWidth == 256 && EltWidth == 64)
   1098       IID = Intrinsic::x86_avx_max_pd_256;
   1099     else
   1100       llvm_unreachable("Unexpected intrinsic");
   1101   } else if (Name.startswith("min.p")) {
   1102     if (VecWidth == 128 && EltWidth == 32)
   1103       IID = Intrinsic::x86_sse_min_ps;
   1104     else if (VecWidth == 128 && EltWidth == 64)
   1105       IID = Intrinsic::x86_sse2_min_pd;
   1106     else if (VecWidth == 256 && EltWidth == 32)
   1107       IID = Intrinsic::x86_avx_min_ps_256;
   1108     else if (VecWidth == 256 && EltWidth == 64)
   1109       IID = Intrinsic::x86_avx_min_pd_256;
   1110     else
   1111       llvm_unreachable("Unexpected intrinsic");
   1112   } else if (Name.startswith("pshuf.b.")) {
   1113     if (VecWidth == 128)
   1114       IID = Intrinsic::x86_ssse3_pshuf_b_128;
   1115     else if (VecWidth == 256)
   1116       IID = Intrinsic::x86_avx2_pshuf_b;
   1117     else if (VecWidth == 512)
   1118       IID = Intrinsic::x86_avx512_pshuf_b_512;
   1119     else
   1120       llvm_unreachable("Unexpected intrinsic");
   1121   } else if (Name.startswith("pmul.hr.sw.")) {
   1122     if (VecWidth == 128)
   1123       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
   1124     else if (VecWidth == 256)
   1125       IID = Intrinsic::x86_avx2_pmul_hr_sw;
   1126     else if (VecWidth == 512)
   1127       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
   1128     else
   1129       llvm_unreachable("Unexpected intrinsic");
   1130   } else if (Name.startswith("pmulh.w.")) {
   1131     if (VecWidth == 128)
   1132       IID = Intrinsic::x86_sse2_pmulh_w;
   1133     else if (VecWidth == 256)
   1134       IID = Intrinsic::x86_avx2_pmulh_w;
   1135     else if (VecWidth == 512)
   1136       IID = Intrinsic::x86_avx512_pmulh_w_512;
   1137     else
   1138       llvm_unreachable("Unexpected intrinsic");
   1139   } else if (Name.startswith("pmulhu.w.")) {
   1140     if (VecWidth == 128)
   1141       IID = Intrinsic::x86_sse2_pmulhu_w;
   1142     else if (VecWidth == 256)
   1143       IID = Intrinsic::x86_avx2_pmulhu_w;
   1144     else if (VecWidth == 512)
   1145       IID = Intrinsic::x86_avx512_pmulhu_w_512;
   1146     else
   1147       llvm_unreachable("Unexpected intrinsic");
   1148   } else if (Name.startswith("pmaddw.d.")) {
   1149     if (VecWidth == 128)
   1150       IID = Intrinsic::x86_sse2_pmadd_wd;
   1151     else if (VecWidth == 256)
   1152       IID = Intrinsic::x86_avx2_pmadd_wd;
   1153     else if (VecWidth == 512)
   1154       IID = Intrinsic::x86_avx512_pmaddw_d_512;
   1155     else
   1156       llvm_unreachable("Unexpected intrinsic");
   1157   } else if (Name.startswith("pmaddubs.w.")) {
   1158     if (VecWidth == 128)
   1159       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
   1160     else if (VecWidth == 256)
   1161       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
   1162     else if (VecWidth == 512)
   1163       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
   1164     else
   1165       llvm_unreachable("Unexpected intrinsic");
   1166   } else if (Name.startswith("packsswb.")) {
   1167     if (VecWidth == 128)
   1168       IID = Intrinsic::x86_sse2_packsswb_128;
   1169     else if (VecWidth == 256)
   1170       IID = Intrinsic::x86_avx2_packsswb;
   1171     else if (VecWidth == 512)
   1172       IID = Intrinsic::x86_avx512_packsswb_512;
   1173     else
   1174       llvm_unreachable("Unexpected intrinsic");
   1175   } else if (Name.startswith("packssdw.")) {
   1176     if (VecWidth == 128)
   1177       IID = Intrinsic::x86_sse2_packssdw_128;
   1178     else if (VecWidth == 256)
   1179       IID = Intrinsic::x86_avx2_packssdw;
   1180     else if (VecWidth == 512)
   1181       IID = Intrinsic::x86_avx512_packssdw_512;
   1182     else
   1183       llvm_unreachable("Unexpected intrinsic");
   1184   } else if (Name.startswith("packuswb.")) {
   1185     if (VecWidth == 128)
   1186       IID = Intrinsic::x86_sse2_packuswb_128;
   1187     else if (VecWidth == 256)
   1188       IID = Intrinsic::x86_avx2_packuswb;
   1189     else if (VecWidth == 512)
   1190       IID = Intrinsic::x86_avx512_packuswb_512;
   1191     else
   1192       llvm_unreachable("Unexpected intrinsic");
   1193   } else if (Name.startswith("packusdw.")) {
   1194     if (VecWidth == 128)
   1195       IID = Intrinsic::x86_sse41_packusdw;
   1196     else if (VecWidth == 256)
   1197       IID = Intrinsic::x86_avx2_packusdw;
   1198     else if (VecWidth == 512)
   1199       IID = Intrinsic::x86_avx512_packusdw_512;
   1200     else
   1201       llvm_unreachable("Unexpected intrinsic");
   1202   } else if (Name.startswith("vpermilvar.")) {
   1203     if (VecWidth == 128 && EltWidth == 32)
   1204       IID = Intrinsic::x86_avx_vpermilvar_ps;
   1205     else if (VecWidth == 128 && EltWidth == 64)
   1206       IID = Intrinsic::x86_avx_vpermilvar_pd;
   1207     else if (VecWidth == 256 && EltWidth == 32)
   1208       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
   1209     else if (VecWidth == 256 && EltWidth == 64)
   1210       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
   1211     else if (VecWidth == 512 && EltWidth == 32)
   1212       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
   1213     else if (VecWidth == 512 && EltWidth == 64)
   1214       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
   1215     else
   1216       llvm_unreachable("Unexpected intrinsic");
   1217   } else if (Name == "cvtpd2dq.256") {
   1218     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
   1219   } else if (Name == "cvtpd2ps.256") {
   1220     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
   1221   } else if (Name == "cvttpd2dq.256") {
   1222     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
   1223   } else if (Name == "cvttps2dq.128") {
   1224     IID = Intrinsic::x86_sse2_cvttps2dq;
   1225   } else if (Name == "cvttps2dq.256") {
   1226     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
   1227   } else if (Name.startswith("permvar.")) {
   1228     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
   1229     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
   1230       IID = Intrinsic::x86_avx2_permps;
   1231     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
   1232       IID = Intrinsic::x86_avx2_permd;
   1233     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
   1234       IID = Intrinsic::x86_avx512_permvar_df_256;
   1235     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
   1236       IID = Intrinsic::x86_avx512_permvar_di_256;
   1237     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
   1238       IID = Intrinsic::x86_avx512_permvar_sf_512;
   1239     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
   1240       IID = Intrinsic::x86_avx512_permvar_si_512;
   1241     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
   1242       IID = Intrinsic::x86_avx512_permvar_df_512;
   1243     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
   1244       IID = Intrinsic::x86_avx512_permvar_di_512;
   1245     else if (VecWidth == 128 && EltWidth == 16)
   1246       IID = Intrinsic::x86_avx512_permvar_hi_128;
   1247     else if (VecWidth == 256 && EltWidth == 16)
   1248       IID = Intrinsic::x86_avx512_permvar_hi_256;
   1249     else if (VecWidth == 512 && EltWidth == 16)
   1250       IID = Intrinsic::x86_avx512_permvar_hi_512;
   1251     else if (VecWidth == 128 && EltWidth == 8)
   1252       IID = Intrinsic::x86_avx512_permvar_qi_128;
   1253     else if (VecWidth == 256 && EltWidth == 8)
   1254       IID = Intrinsic::x86_avx512_permvar_qi_256;
   1255     else if (VecWidth == 512 && EltWidth == 8)
   1256       IID = Intrinsic::x86_avx512_permvar_qi_512;
   1257     else
   1258       llvm_unreachable("Unexpected intrinsic");
   1259   } else if (Name.startswith("dbpsadbw.")) {
   1260     if (VecWidth == 128)
   1261       IID = Intrinsic::x86_avx512_dbpsadbw_128;
   1262     else if (VecWidth == 256)
   1263       IID = Intrinsic::x86_avx512_dbpsadbw_256;
   1264     else if (VecWidth == 512)
   1265       IID = Intrinsic::x86_avx512_dbpsadbw_512;
   1266     else
   1267       llvm_unreachable("Unexpected intrinsic");
   1268   } else if (Name.startswith("vpshld.")) {
   1269     if (VecWidth == 128 && Name[7] == 'q')
   1270       IID = Intrinsic::x86_avx512_vpshld_q_128;
   1271     else if (VecWidth == 128 && Name[7] == 'd')
   1272       IID = Intrinsic::x86_avx512_vpshld_d_128;
   1273     else if (VecWidth == 128 && Name[7] == 'w')
   1274       IID = Intrinsic::x86_avx512_vpshld_w_128;
   1275     else if (VecWidth == 256 && Name[7] == 'q')
   1276       IID = Intrinsic::x86_avx512_vpshld_q_256;
   1277     else if (VecWidth == 256 && Name[7] == 'd')
   1278       IID = Intrinsic::x86_avx512_vpshld_d_256;
   1279     else if (VecWidth == 256 && Name[7] == 'w')
   1280       IID = Intrinsic::x86_avx512_vpshld_w_256;
   1281     else if (VecWidth == 512 && Name[7] == 'q')
   1282       IID = Intrinsic::x86_avx512_vpshld_q_512;
   1283     else if (VecWidth == 512 && Name[7] == 'd')
   1284       IID = Intrinsic::x86_avx512_vpshld_d_512;
   1285     else if (VecWidth == 512 && Name[7] == 'w')
   1286       IID = Intrinsic::x86_avx512_vpshld_w_512;
   1287     else
   1288       llvm_unreachable("Unexpected intrinsic");
   1289   } else if (Name.startswith("vpshrd.")) {
   1290     if (VecWidth == 128 && Name[7] == 'q')
   1291       IID = Intrinsic::x86_avx512_vpshrd_q_128;
   1292     else if (VecWidth == 128 && Name[7] == 'd')
   1293       IID = Intrinsic::x86_avx512_vpshrd_d_128;
   1294     else if (VecWidth == 128 && Name[7] == 'w')
   1295       IID = Intrinsic::x86_avx512_vpshrd_w_128;
   1296     else if (VecWidth == 256 && Name[7] == 'q')
   1297       IID = Intrinsic::x86_avx512_vpshrd_q_256;
   1298     else if (VecWidth == 256 && Name[7] == 'd')
   1299       IID = Intrinsic::x86_avx512_vpshrd_d_256;
   1300     else if (VecWidth == 256 && Name[7] == 'w')
   1301       IID = Intrinsic::x86_avx512_vpshrd_w_256;
   1302     else if (VecWidth == 512 && Name[7] == 'q')
   1303       IID = Intrinsic::x86_avx512_vpshrd_q_512;
   1304     else if (VecWidth == 512 && Name[7] == 'd')
   1305       IID = Intrinsic::x86_avx512_vpshrd_d_512;
   1306     else if (VecWidth == 512 && Name[7] == 'w')
   1307       IID = Intrinsic::x86_avx512_vpshrd_w_512;
   1308     else
   1309       llvm_unreachable("Unexpected intrinsic");
   1310   } else if (Name.startswith("prorv.")) {
   1311     if (VecWidth == 128 && EltWidth == 32)
   1312       IID = Intrinsic::x86_avx512_prorv_d_128;
   1313     else if (VecWidth == 256 && EltWidth == 32)
   1314       IID = Intrinsic::x86_avx512_prorv_d_256;
   1315     else if (VecWidth == 512 && EltWidth == 32)
   1316       IID = Intrinsic::x86_avx512_prorv_d_512;
   1317     else if (VecWidth == 128 && EltWidth == 64)
   1318       IID = Intrinsic::x86_avx512_prorv_q_128;
   1319     else if (VecWidth == 256 && EltWidth == 64)
   1320       IID = Intrinsic::x86_avx512_prorv_q_256;
   1321     else if (VecWidth == 512 && EltWidth == 64)
   1322       IID = Intrinsic::x86_avx512_prorv_q_512;
   1323     else
   1324       llvm_unreachable("Unexpected intrinsic");
   1325   } else if (Name.startswith("prolv.")) {
   1326     if (VecWidth == 128 && EltWidth == 32)
   1327       IID = Intrinsic::x86_avx512_prolv_d_128;
   1328     else if (VecWidth == 256 && EltWidth == 32)
   1329       IID = Intrinsic::x86_avx512_prolv_d_256;
   1330     else if (VecWidth == 512 && EltWidth == 32)
   1331       IID = Intrinsic::x86_avx512_prolv_d_512;
   1332     else if (VecWidth == 128 && EltWidth == 64)
   1333       IID = Intrinsic::x86_avx512_prolv_q_128;
   1334     else if (VecWidth == 256 && EltWidth == 64)
   1335       IID = Intrinsic::x86_avx512_prolv_q_256;
   1336     else if (VecWidth == 512 && EltWidth == 64)
   1337       IID = Intrinsic::x86_avx512_prolv_q_512;
   1338     else
   1339       llvm_unreachable("Unexpected intrinsic");
   1340   } else if (Name.startswith("pror.")) {
   1341     if (VecWidth == 128 && EltWidth == 32)
   1342       IID = Intrinsic::x86_avx512_pror_d_128;
   1343     else if (VecWidth == 256 && EltWidth == 32)
   1344       IID = Intrinsic::x86_avx512_pror_d_256;
   1345     else if (VecWidth == 512 && EltWidth == 32)
   1346       IID = Intrinsic::x86_avx512_pror_d_512;
   1347     else if (VecWidth == 128 && EltWidth == 64)
   1348       IID = Intrinsic::x86_avx512_pror_q_128;
   1349     else if (VecWidth == 256 && EltWidth == 64)
   1350       IID = Intrinsic::x86_avx512_pror_q_256;
   1351     else if (VecWidth == 512 && EltWidth == 64)
   1352       IID = Intrinsic::x86_avx512_pror_q_512;
   1353     else
   1354       llvm_unreachable("Unexpected intrinsic");
   1355   } else if (Name.startswith("prol.")) {
   1356     if (VecWidth == 128 && EltWidth == 32)
   1357       IID = Intrinsic::x86_avx512_prol_d_128;
   1358     else if (VecWidth == 256 && EltWidth == 32)
   1359       IID = Intrinsic::x86_avx512_prol_d_256;
   1360     else if (VecWidth == 512 && EltWidth == 32)
   1361       IID = Intrinsic::x86_avx512_prol_d_512;
   1362     else if (VecWidth == 128 && EltWidth == 64)
   1363       IID = Intrinsic::x86_avx512_prol_q_128;
   1364     else if (VecWidth == 256 && EltWidth == 64)
   1365       IID = Intrinsic::x86_avx512_prol_q_256;
   1366     else if (VecWidth == 512 && EltWidth == 64)
   1367       IID = Intrinsic::x86_avx512_prol_q_512;
   1368     else
   1369       llvm_unreachable("Unexpected intrinsic");
   1370   } else
   1371     return false;
   1372 
   1373   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
   1374                                CI.arg_operands().end());
   1375   Args.pop_back();
   1376   Args.pop_back();
   1377   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
   1378                            Args);
   1379   unsigned NumArgs = CI.getNumArgOperands();
   1380   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
   1381                       CI.getArgOperand(NumArgs - 2));
   1382   return true;
   1383 }
   1384 
   1385 /// Upgrade comment in call to inline asm that represents an objc retain release
   1386 /// marker.
   1387 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
   1388   size_t Pos;
   1389   if (AsmStr->find("mov\tfp") == 0 &&
   1390       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
   1391       (Pos = AsmStr->find("# marker")) != std::string::npos) {
   1392     AsmStr->replace(Pos, 1, ";");
   1393   }
   1394   return;
   1395 }
   1396 
   1397 /// Upgrade a call to an old intrinsic. All argument and return casting must be
   1398 /// provided to seamlessly integrate with existing context.
   1399 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   1400   Function *F = CI->getCalledFunction();
   1401   LLVMContext &C = CI->getContext();
   1402   IRBuilder<> Builder(C);
   1403   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
   1404 
   1405   assert(F && "Intrinsic call is not direct?");
   1406 
   1407   if (!NewFn) {
   1408     // Get the Function's name.
   1409     StringRef Name = F->getName();
   1410 
   1411     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
   1412     Name = Name.substr(5);
   1413 
   1414     bool IsX86 = Name.startswith("x86.");
   1415     if (IsX86)
   1416       Name = Name.substr(4);
   1417     bool IsNVVM = Name.startswith("nvvm.");
   1418     if (IsNVVM)
   1419       Name = Name.substr(5);
   1420 
   1421     if (IsX86 && Name.startswith("sse4a.movnt.")) {
   1422       Module *M = F->getParent();
   1423       SmallVector<Metadata *, 1> Elts;
   1424       Elts.push_back(
   1425           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
   1426       MDNode *Node = MDNode::get(C, Elts);
   1427 
   1428       Value *Arg0 = CI->getArgOperand(0);
   1429       Value *Arg1 = CI->getArgOperand(1);
   1430 
   1431       // Nontemporal (unaligned) store of the 0'th element of the float/double
   1432       // vector.
   1433       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
   1434       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
   1435       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
   1436       Value *Extract =
   1437           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
   1438 
   1439       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
   1440       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
   1441 
   1442       // Remove intrinsic.
   1443       CI->eraseFromParent();
   1444       return;
   1445     }
   1446 
   1447     if (IsX86 && (Name.startswith("avx.movnt.") ||
   1448                   Name.startswith("avx512.storent."))) {
   1449       Module *M = F->getParent();
   1450       SmallVector<Metadata *, 1> Elts;
   1451       Elts.push_back(
   1452           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
   1453       MDNode *Node = MDNode::get(C, Elts);
   1454 
   1455       Value *Arg0 = CI->getArgOperand(0);
   1456       Value *Arg1 = CI->getArgOperand(1);
   1457 
   1458       // Convert the type of the pointer to a pointer to the stored type.
   1459       Value *BC = Builder.CreateBitCast(Arg0,
   1460                                         PointerType::getUnqual(Arg1->getType()),
   1461                                         "cast");
   1462       VectorType *VTy = cast<VectorType>(Arg1->getType());
   1463       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
   1464                                                  VTy->getBitWidth() / 8);
   1465       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
   1466 
   1467       // Remove intrinsic.
   1468       CI->eraseFromParent();
   1469       return;
   1470     }
   1471 
   1472     if (IsX86 && Name == "sse2.storel.dq") {
   1473       Value *Arg0 = CI->getArgOperand(0);
   1474       Value *Arg1 = CI->getArgOperand(1);
   1475 
   1476       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
   1477       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
   1478       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
   1479       Value *BC = Builder.CreateBitCast(Arg0,
   1480                                         PointerType::getUnqual(Elt->getType()),
   1481                                         "cast");
   1482       Builder.CreateAlignedStore(Elt, BC, 1);
   1483 
   1484       // Remove intrinsic.
   1485       CI->eraseFromParent();
   1486       return;
   1487     }
   1488 
   1489     if (IsX86 && (Name.startswith("sse.storeu.") ||
   1490                   Name.startswith("sse2.storeu.") ||
   1491                   Name.startswith("avx.storeu."))) {
   1492       Value *Arg0 = CI->getArgOperand(0);
   1493       Value *Arg1 = CI->getArgOperand(1);
   1494 
   1495       Arg0 = Builder.CreateBitCast(Arg0,
   1496                                    PointerType::getUnqual(Arg1->getType()),
   1497                                    "cast");
   1498       Builder.CreateAlignedStore(Arg1, Arg0, 1);
   1499 
   1500       // Remove intrinsic.
   1501       CI->eraseFromParent();
   1502       return;
   1503     }
   1504 
   1505     if (IsX86 && Name == "avx512.mask.store.ss") {
   1506       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
   1507       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
   1508                          Mask, false);
   1509 
   1510       // Remove intrinsic.
   1511       CI->eraseFromParent();
   1512       return;
   1513     }
   1514 
   1515     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
   1516       // "avx512.mask.storeu." or "avx512.mask.store."
   1517       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
   1518       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
   1519                          CI->getArgOperand(2), Aligned);
   1520 
   1521       // Remove intrinsic.
   1522       CI->eraseFromParent();
   1523       return;
   1524     }
   1525 
   1526     Value *Rep;
   1527     // Upgrade packed integer vector compare intrinsics to compare instructions.
   1528     if (IsX86 && (Name.startswith("sse2.pcmp") ||
   1529                   Name.startswith("avx2.pcmp"))) {
   1530       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
   1531       bool CmpEq = Name[9] == 'e';
   1532       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
   1533                                CI->getArgOperand(0), CI->getArgOperand(1));
   1534       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
   1535     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
   1536       Type *ExtTy = Type::getInt32Ty(C);
   1537       if (CI->getOperand(0)->getType()->isIntegerTy(8))
   1538         ExtTy = Type::getInt64Ty(C);
   1539       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
   1540                          ExtTy->getPrimitiveSizeInBits();
   1541       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
   1542       Rep = Builder.CreateVectorSplat(NumElts, Rep);
   1543     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
   1544                          Name == "sse2.sqrt.sd")) {
   1545       Value *Vec = CI->getArgOperand(0);
   1546       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
   1547       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
   1548                                                  Intrinsic::sqrt, Elt0->getType());
   1549       Elt0 = Builder.CreateCall(Intr, Elt0);
   1550       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
   1551     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
   1552                          Name.startswith("sse2.sqrt.p") ||
   1553                          Name.startswith("sse.sqrt.p"))) {
   1554       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
   1555                                                          Intrinsic::sqrt,
   1556                                                          CI->getType()),
   1557                                {CI->getArgOperand(0)});
   1558     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
   1559       if (CI->getNumArgOperands() == 4 &&
   1560           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
   1561            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
   1562         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
   1563                                             : Intrinsic::x86_avx512_sqrt_pd_512;
   1564 
   1565         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
   1566         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
   1567                                                            IID), Args);
   1568       } else {
   1569         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
   1570                                                            Intrinsic::sqrt,
   1571                                                            CI->getType()),
   1572                                  {CI->getArgOperand(0)});
   1573       }
   1574       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   1575                           CI->getArgOperand(1));
   1576     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
   1577                          Name.startswith("avx512.ptestnm"))) {
   1578       Value *Op0 = CI->getArgOperand(0);
   1579       Value *Op1 = CI->getArgOperand(1);
   1580       Value *Mask = CI->getArgOperand(2);
   1581       Rep = Builder.CreateAnd(Op0, Op1);
   1582       llvm::Type *Ty = Op0->getType();
   1583       Value *Zero = llvm::Constant::getNullValue(Ty);
   1584       ICmpInst::Predicate Pred =
   1585         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
   1586       Rep = Builder.CreateICmp(Pred, Rep, Zero);
   1587       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
   1588     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
   1589       unsigned NumElts =
   1590           CI->getArgOperand(1)->getType()->getVectorNumElements();
   1591       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
   1592       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   1593                           CI->getArgOperand(1));
   1594     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
   1595       unsigned NumElts = CI->getType()->getScalarSizeInBits();
   1596       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
   1597       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
   1598       uint32_t Indices[64];
   1599       for (unsigned i = 0; i != NumElts; ++i)
   1600         Indices[i] = i;
   1601 
   1602       // First extract half of each vector. This gives better codegen than
   1603       // doing it in a single shuffle.
   1604       LHS = Builder.CreateShuffleVector(LHS, LHS,
   1605                                         makeArrayRef(Indices, NumElts / 2));
   1606       RHS = Builder.CreateShuffleVector(RHS, RHS,
   1607                                         makeArrayRef(Indices, NumElts / 2));
   1608       // Concat the vectors.
   1609       // NOTE: Operands have to be swapped to match intrinsic definition.
   1610       Rep = Builder.CreateShuffleVector(RHS, LHS,
   1611                                         makeArrayRef(Indices, NumElts));
   1612       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1613     } else if (IsX86 && Name == "avx512.kand.w") {
   1614       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1615       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
   1616       Rep = Builder.CreateAnd(LHS, RHS);
   1617       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1618     } else if (IsX86 && Name == "avx512.kandn.w") {
   1619       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1620       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
   1621       LHS = Builder.CreateNot(LHS);
   1622       Rep = Builder.CreateAnd(LHS, RHS);
   1623       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1624     } else if (IsX86 && Name == "avx512.kor.w") {
   1625       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1626       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
   1627       Rep = Builder.CreateOr(LHS, RHS);
   1628       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1629     } else if (IsX86 && Name == "avx512.kxor.w") {
   1630       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1631       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
   1632       Rep = Builder.CreateXor(LHS, RHS);
   1633       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1634     } else if (IsX86 && Name == "avx512.kxnor.w") {
   1635       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1636       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
   1637       LHS = Builder.CreateNot(LHS);
   1638       Rep = Builder.CreateXor(LHS, RHS);
   1639       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1640     } else if (IsX86 && Name == "avx512.knot.w") {
   1641       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1642       Rep = Builder.CreateNot(Rep);
   1643       Rep = Builder.CreateBitCast(Rep, CI->getType());
   1644     } else if (IsX86 &&
   1645                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
   1646       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
   1647       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
   1648       Rep = Builder.CreateOr(LHS, RHS);
   1649       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
   1650       Value *C;
   1651       if (Name[14] == 'c')
   1652         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
   1653       else
   1654         C = ConstantInt::getNullValue(Builder.getInt16Ty());
   1655       Rep = Builder.CreateICmpEQ(Rep, C);
   1656       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
   1657     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
   1658       Type *I32Ty = Type::getInt32Ty(C);
   1659       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
   1660                                                  ConstantInt::get(I32Ty, 0));
   1661       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
   1662                                                  ConstantInt::get(I32Ty, 0));
   1663       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
   1664                                         Builder.CreateFAdd(Elt0, Elt1),
   1665                                         ConstantInt::get(I32Ty, 0));
   1666     } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
   1667       Type *I32Ty = Type::getInt32Ty(C);
   1668       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
   1669                                                  ConstantInt::get(I32Ty, 0));
   1670       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
   1671                                                  ConstantInt::get(I32Ty, 0));
   1672       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
   1673                                         Builder.CreateFSub(Elt0, Elt1),
   1674                                         ConstantInt::get(I32Ty, 0));
   1675     } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
   1676       Type *I32Ty = Type::getInt32Ty(C);
   1677       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
   1678                                                  ConstantInt::get(I32Ty, 0));
   1679       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
   1680                                                  ConstantInt::get(I32Ty, 0));
   1681       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
   1682                                         Builder.CreateFMul(Elt0, Elt1),
   1683                                         ConstantInt::get(I32Ty, 0));
   1684     } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
   1685       Type *I32Ty = Type::getInt32Ty(C);
   1686       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
   1687                                                  ConstantInt::get(I32Ty, 0));
   1688       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
   1689                                                  ConstantInt::get(I32Ty, 0));
   1690       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
   1691                                         Builder.CreateFDiv(Elt0, Elt1),
   1692                                         ConstantInt::get(I32Ty, 0));
   1693     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
   1694       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
   1695       bool CmpEq = Name[16] == 'e';
   1696       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
   1697     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
   1698       Type *OpTy = CI->getArgOperand(0)->getType();
   1699       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
   1700       unsigned EltWidth = OpTy->getScalarSizeInBits();
   1701       Intrinsic::ID IID;
   1702       if (VecWidth == 128 && EltWidth == 32)
   1703         IID = Intrinsic::x86_avx512_fpclass_ps_128;
   1704       else if (VecWidth == 256 && EltWidth == 32)
   1705         IID = Intrinsic::x86_avx512_fpclass_ps_256;
   1706       else if (VecWidth == 512 && EltWidth == 32)
   1707         IID = Intrinsic::x86_avx512_fpclass_ps_512;
   1708       else if (VecWidth == 128 && EltWidth == 64)
   1709         IID = Intrinsic::x86_avx512_fpclass_pd_128;
   1710       else if (VecWidth == 256 && EltWidth == 64)
   1711         IID = Intrinsic::x86_avx512_fpclass_pd_256;
   1712       else if (VecWidth == 512 && EltWidth == 64)
   1713         IID = Intrinsic::x86_avx512_fpclass_pd_512;
   1714       else
   1715         llvm_unreachable("Unexpected intrinsic");
   1716 
   1717       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   1718                                { CI->getOperand(0), CI->getArgOperand(1) });
   1719       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
   1720     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
   1721       Type *OpTy = CI->getArgOperand(0)->getType();
   1722       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
   1723       unsigned EltWidth = OpTy->getScalarSizeInBits();
   1724       Intrinsic::ID IID;
   1725       if (VecWidth == 128 && EltWidth == 32)
   1726         IID = Intrinsic::x86_avx512_cmp_ps_128;
   1727       else if (VecWidth == 256 && EltWidth == 32)
   1728         IID = Intrinsic::x86_avx512_cmp_ps_256;
   1729       else if (VecWidth == 512 && EltWidth == 32)
   1730         IID = Intrinsic::x86_avx512_cmp_ps_512;
   1731       else if (VecWidth == 128 && EltWidth == 64)
   1732         IID = Intrinsic::x86_avx512_cmp_pd_128;
   1733       else if (VecWidth == 256 && EltWidth == 64)
   1734         IID = Intrinsic::x86_avx512_cmp_pd_256;
   1735       else if (VecWidth == 512 && EltWidth == 64)
   1736         IID = Intrinsic::x86_avx512_cmp_pd_512;
   1737       else
   1738         llvm_unreachable("Unexpected intrinsic");
   1739 
   1740       SmallVector<Value *, 4> Args;
   1741       Args.push_back(CI->getArgOperand(0));
   1742       Args.push_back(CI->getArgOperand(1));
   1743       Args.push_back(CI->getArgOperand(2));
   1744       if (CI->getNumArgOperands() == 5)
   1745         Args.push_back(CI->getArgOperand(4));
   1746 
   1747       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   1748                                Args);
   1749       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
   1750     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
   1751                Name[16] != 'p') {
   1752       // Integer compare intrinsics.
   1753       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   1754       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
   1755     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
   1756       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   1757       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
   1758     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
   1759                          Name.startswith("avx512.cvtw2mask.") ||
   1760                          Name.startswith("avx512.cvtd2mask.") ||
   1761                          Name.startswith("avx512.cvtq2mask."))) {
   1762       Value *Op = CI->getArgOperand(0);
   1763       Value *Zero = llvm::Constant::getNullValue(Op->getType());
   1764       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
   1765       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
   1766     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
   1767                         Name == "ssse3.pabs.w.128" ||
   1768                         Name == "ssse3.pabs.d.128" ||
   1769                         Name.startswith("avx2.pabs") ||
   1770                         Name.startswith("avx512.mask.pabs"))) {
   1771       Rep = upgradeAbs(Builder, *CI);
   1772     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
   1773                          Name == "sse2.pmaxs.w" ||
   1774                          Name == "sse41.pmaxsd" ||
   1775                          Name.startswith("avx2.pmaxs") ||
   1776                          Name.startswith("avx512.mask.pmaxs"))) {
   1777       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
   1778     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
   1779                          Name == "sse41.pmaxuw" ||
   1780                          Name == "sse41.pmaxud" ||
   1781                          Name.startswith("avx2.pmaxu") ||
   1782                          Name.startswith("avx512.mask.pmaxu"))) {
   1783       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
   1784     } else if (IsX86 && (Name == "sse41.pminsb" ||
   1785                          Name == "sse2.pmins.w" ||
   1786                          Name == "sse41.pminsd" ||
   1787                          Name.startswith("avx2.pmins") ||
   1788                          Name.startswith("avx512.mask.pmins"))) {
   1789       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
   1790     } else if (IsX86 && (Name == "sse2.pminu.b" ||
   1791                          Name == "sse41.pminuw" ||
   1792                          Name == "sse41.pminud" ||
   1793                          Name.startswith("avx2.pminu") ||
   1794                          Name.startswith("avx512.mask.pminu"))) {
   1795       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
   1796     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
   1797                          Name == "avx2.pmulu.dq" ||
   1798                          Name == "avx512.pmulu.dq.512" ||
   1799                          Name.startswith("avx512.mask.pmulu.dq."))) {
   1800       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
   1801     } else if (IsX86 && (Name == "sse41.pmuldq" ||
   1802                          Name == "avx2.pmul.dq" ||
   1803                          Name == "avx512.pmul.dq.512" ||
   1804                          Name.startswith("avx512.mask.pmul.dq."))) {
   1805       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
   1806     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
   1807                          Name == "sse2.cvtsi2sd" ||
   1808                          Name == "sse.cvtsi642ss" ||
   1809                          Name == "sse2.cvtsi642sd")) {
   1810       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
   1811                                  CI->getType()->getVectorElementType());
   1812       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
   1813     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
   1814       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
   1815                                  CI->getType()->getVectorElementType());
   1816       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
   1817     } else if (IsX86 && Name == "sse2.cvtss2sd") {
   1818       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
   1819       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
   1820       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
   1821     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
   1822                          Name == "sse2.cvtdq2ps" ||
   1823                          Name == "avx.cvtdq2.pd.256" ||
   1824                          Name == "avx.cvtdq2.ps.256" ||
   1825                          Name.startswith("avx512.mask.cvtdq2pd.") ||
   1826                          Name.startswith("avx512.mask.cvtudq2pd.") ||
   1827                          Name == "avx512.mask.cvtdq2ps.128" ||
   1828                          Name == "avx512.mask.cvtdq2ps.256" ||
   1829                          Name == "avx512.mask.cvtudq2ps.128" ||
   1830                          Name == "avx512.mask.cvtudq2ps.256" ||
   1831                          Name == "avx512.mask.cvtqq2pd.128" ||
   1832                          Name == "avx512.mask.cvtqq2pd.256" ||
   1833                          Name == "avx512.mask.cvtuqq2pd.128" ||
   1834                          Name == "avx512.mask.cvtuqq2pd.256" ||
   1835                          Name == "sse2.cvtps2pd" ||
   1836                          Name == "avx.cvt.ps2.pd.256" ||
   1837                          Name == "avx512.mask.cvtps2pd.128" ||
   1838                          Name == "avx512.mask.cvtps2pd.256")) {
   1839       Type *DstTy = CI->getType();
   1840       Rep = CI->getArgOperand(0);
   1841 
   1842       unsigned NumDstElts = DstTy->getVectorNumElements();
   1843       if (NumDstElts < Rep->getType()->getVectorNumElements()) {
   1844         assert(NumDstElts == 2 && "Unexpected vector size");
   1845         uint32_t ShuffleMask[2] = { 0, 1 };
   1846         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
   1847       }
   1848 
   1849       bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
   1850       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
   1851       if (IsPS2PD)
   1852         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
   1853       else if (IsUnsigned)
   1854         Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
   1855       else
   1856         Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
   1857 
   1858       if (CI->getNumArgOperands() == 3)
   1859         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   1860                             CI->getArgOperand(1));
   1861     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
   1862       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
   1863                               CI->getArgOperand(1), CI->getArgOperand(2),
   1864                               /*Aligned*/false);
   1865     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
   1866       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
   1867                               CI->getArgOperand(1),CI->getArgOperand(2),
   1868                               /*Aligned*/true);
   1869     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
   1870       Type *ResultTy = CI->getType();
   1871       Type *PtrTy = ResultTy->getVectorElementType();
   1872 
   1873       // Cast the pointer to element type.
   1874       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
   1875                                          llvm::PointerType::getUnqual(PtrTy));
   1876 
   1877       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
   1878                                      ResultTy->getVectorNumElements());
   1879 
   1880       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
   1881                                                 Intrinsic::masked_expandload,
   1882                                                 ResultTy);
   1883       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
   1884     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
   1885       Type *ResultTy = CI->getArgOperand(1)->getType();
   1886       Type *PtrTy = ResultTy->getVectorElementType();
   1887 
   1888       // Cast the pointer to element type.
   1889       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
   1890                                          llvm::PointerType::getUnqual(PtrTy));
   1891 
   1892       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
   1893                                      ResultTy->getVectorNumElements());
   1894 
   1895       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
   1896                                                 Intrinsic::masked_compressstore,
   1897                                                 ResultTy);
   1898       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
   1899     } else if (IsX86 && Name.startswith("xop.vpcom")) {
   1900       Intrinsic::ID intID;
   1901       if (Name.endswith("ub"))
   1902         intID = Intrinsic::x86_xop_vpcomub;
   1903       else if (Name.endswith("uw"))
   1904         intID = Intrinsic::x86_xop_vpcomuw;
   1905       else if (Name.endswith("ud"))
   1906         intID = Intrinsic::x86_xop_vpcomud;
   1907       else if (Name.endswith("uq"))
   1908         intID = Intrinsic::x86_xop_vpcomuq;
   1909       else if (Name.endswith("b"))
   1910         intID = Intrinsic::x86_xop_vpcomb;
   1911       else if (Name.endswith("w"))
   1912         intID = Intrinsic::x86_xop_vpcomw;
   1913       else if (Name.endswith("d"))
   1914         intID = Intrinsic::x86_xop_vpcomd;
   1915       else if (Name.endswith("q"))
   1916         intID = Intrinsic::x86_xop_vpcomq;
   1917       else
   1918         llvm_unreachable("Unknown suffix");
   1919 
   1920       Name = Name.substr(9); // strip off "xop.vpcom"
   1921       unsigned Imm;
   1922       if (Name.startswith("lt"))
   1923         Imm = 0;
   1924       else if (Name.startswith("le"))
   1925         Imm = 1;
   1926       else if (Name.startswith("gt"))
   1927         Imm = 2;
   1928       else if (Name.startswith("ge"))
   1929         Imm = 3;
   1930       else if (Name.startswith("eq"))
   1931         Imm = 4;
   1932       else if (Name.startswith("ne"))
   1933         Imm = 5;
   1934       else if (Name.startswith("false"))
   1935         Imm = 6;
   1936       else if (Name.startswith("true"))
   1937         Imm = 7;
   1938       else
   1939         llvm_unreachable("Unknown condition");
   1940 
   1941       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
   1942       Rep =
   1943           Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
   1944                                      Builder.getInt8(Imm)});
   1945     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
   1946       Value *Sel = CI->getArgOperand(2);
   1947       Value *NotSel = Builder.CreateNot(Sel);
   1948       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
   1949       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
   1950       Rep = Builder.CreateOr(Sel0, Sel1);
   1951     } else if (IsX86 && Name == "sse42.crc32.64.8") {
   1952       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
   1953                                                Intrinsic::x86_sse42_crc32_32_8);
   1954       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
   1955       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
   1956       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
   1957     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
   1958                          Name.startswith("avx512.vbroadcast.s"))) {
   1959       // Replace broadcasts with a series of insertelements.
   1960       Type *VecTy = CI->getType();
   1961       Type *EltTy = VecTy->getVectorElementType();
   1962       unsigned EltNum = VecTy->getVectorNumElements();
   1963       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
   1964                                           EltTy->getPointerTo());
   1965       Value *Load = Builder.CreateLoad(EltTy, Cast);
   1966       Type *I32Ty = Type::getInt32Ty(C);
   1967       Rep = UndefValue::get(VecTy);
   1968       for (unsigned I = 0; I < EltNum; ++I)
   1969         Rep = Builder.CreateInsertElement(Rep, Load,
   1970                                           ConstantInt::get(I32Ty, I));
   1971     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
   1972                          Name.startswith("sse41.pmovzx") ||
   1973                          Name.startswith("avx2.pmovsx") ||
   1974                          Name.startswith("avx2.pmovzx") ||
   1975                          Name.startswith("avx512.mask.pmovsx") ||
   1976                          Name.startswith("avx512.mask.pmovzx"))) {
   1977       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
   1978       VectorType *DstTy = cast<VectorType>(CI->getType());
   1979       unsigned NumDstElts = DstTy->getNumElements();
   1980 
   1981       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
   1982       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
   1983       for (unsigned i = 0; i != NumDstElts; ++i)
   1984         ShuffleMask[i] = i;
   1985 
   1986       Value *SV = Builder.CreateShuffleVector(
   1987           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
   1988 
   1989       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
   1990       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
   1991                    : Builder.CreateZExt(SV, DstTy);
   1992       // If there are 3 arguments, it's a masked intrinsic so we need a select.
   1993       if (CI->getNumArgOperands() == 3)
   1994         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   1995                             CI->getArgOperand(1));
   1996     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
   1997                          Name == "avx2.vbroadcasti128")) {
   1998       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
   1999       Type *EltTy = CI->getType()->getVectorElementType();
   2000       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
   2001       Type *VT = VectorType::get(EltTy, NumSrcElts);
   2002       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
   2003                                             PointerType::getUnqual(VT));
   2004       Value *Load = Builder.CreateAlignedLoad(Op, 1);
   2005       if (NumSrcElts == 2)
   2006         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
   2007                                           { 0, 1, 0, 1 });
   2008       else
   2009         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
   2010                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
   2011     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
   2012                          Name.startswith("avx512.mask.shuf.f"))) {
   2013       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   2014       Type *VT = CI->getType();
   2015       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
   2016       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
   2017       unsigned ControlBitsMask = NumLanes - 1;
   2018       unsigned NumControlBits = NumLanes / 2;
   2019       SmallVector<uint32_t, 8> ShuffleMask(0);
   2020 
   2021       for (unsigned l = 0; l != NumLanes; ++l) {
   2022         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
   2023         // We actually need the other source.
   2024         if (l >= NumLanes / 2)
   2025           LaneMask += NumLanes;
   2026         for (unsigned i = 0; i != NumElementsInLane; ++i)
   2027           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
   2028       }
   2029       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
   2030                                         CI->getArgOperand(1), ShuffleMask);
   2031       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
   2032                           CI->getArgOperand(3));
   2033     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
   2034                          Name.startswith("avx512.mask.broadcasti"))) {
   2035       unsigned NumSrcElts =
   2036                         CI->getArgOperand(0)->getType()->getVectorNumElements();
   2037       unsigned NumDstElts = CI->getType()->getVectorNumElements();
   2038 
   2039       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
   2040       for (unsigned i = 0; i != NumDstElts; ++i)
   2041         ShuffleMask[i] = i % NumSrcElts;
   2042 
   2043       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
   2044                                         CI->getArgOperand(0),
   2045                                         ShuffleMask);
   2046       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   2047                           CI->getArgOperand(1));
   2048     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
   2049                          Name.startswith("avx2.vbroadcast") ||
   2050                          Name.startswith("avx512.pbroadcast") ||
   2051                          Name.startswith("avx512.mask.broadcast.s"))) {
   2052       // Replace vp?broadcasts with a vector shuffle.
   2053       Value *Op = CI->getArgOperand(0);
   2054       unsigned NumElts = CI->getType()->getVectorNumElements();
   2055       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
   2056       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
   2057                                         Constant::getNullValue(MaskTy));
   2058 
   2059       if (CI->getNumArgOperands() == 3)
   2060         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   2061                             CI->getArgOperand(1));
   2062     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
   2063       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
   2064                                       CI->getArgOperand(1),
   2065                                       CI->getArgOperand(2),
   2066                                       CI->getArgOperand(3),
   2067                                       CI->getArgOperand(4),
   2068                                       false);
   2069     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
   2070       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
   2071                                       CI->getArgOperand(1),
   2072                                       CI->getArgOperand(2),
   2073                                       CI->getArgOperand(3),
   2074                                       CI->getArgOperand(4),
   2075                                       true);
   2076     } else if (IsX86 && (Name == "sse2.psll.dq" ||
   2077                          Name == "avx2.psll.dq")) {
   2078       // 128/256-bit shift left specified in bits.
   2079       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2080       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
   2081                                        Shift / 8); // Shift is in bits.
   2082     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
   2083                          Name == "avx2.psrl.dq")) {
   2084       // 128/256-bit shift right specified in bits.
   2085       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2086       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
   2087                                        Shift / 8); // Shift is in bits.
   2088     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
   2089                          Name == "avx2.psll.dq.bs" ||
   2090                          Name == "avx512.psll.dq.512")) {
   2091       // 128/256/512-bit shift left specified in bytes.
   2092       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2093       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
   2094     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
   2095                          Name == "avx2.psrl.dq.bs" ||
   2096                          Name == "avx512.psrl.dq.512")) {
   2097       // 128/256/512-bit shift right specified in bytes.
   2098       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2099       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
   2100     } else if (IsX86 && (Name == "sse41.pblendw" ||
   2101                          Name.startswith("sse41.blendp") ||
   2102                          Name.startswith("avx.blend.p") ||
   2103                          Name == "avx2.pblendw" ||
   2104                          Name.startswith("avx2.pblendd."))) {
   2105       Value *Op0 = CI->getArgOperand(0);
   2106       Value *Op1 = CI->getArgOperand(1);
   2107       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   2108       VectorType *VecTy = cast<VectorType>(CI->getType());
   2109       unsigned NumElts = VecTy->getNumElements();
   2110 
   2111       SmallVector<uint32_t, 16> Idxs(NumElts);
   2112       for (unsigned i = 0; i != NumElts; ++i)
   2113         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
   2114 
   2115       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
   2116     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
   2117                          Name == "avx2.vinserti128" ||
   2118                          Name.startswith("avx512.mask.insert"))) {
   2119       Value *Op0 = CI->getArgOperand(0);
   2120       Value *Op1 = CI->getArgOperand(1);
   2121       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   2122       unsigned DstNumElts = CI->getType()->getVectorNumElements();
   2123       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
   2124       unsigned Scale = DstNumElts / SrcNumElts;
   2125 
   2126       // Mask off the high bits of the immediate value; hardware ignores those.
   2127       Imm = Imm % Scale;
   2128 
   2129       // Extend the second operand into a vector the size of the destination.
   2130       Value *UndefV = UndefValue::get(Op1->getType());
   2131       SmallVector<uint32_t, 8> Idxs(DstNumElts);
   2132       for (unsigned i = 0; i != SrcNumElts; ++i)
   2133         Idxs[i] = i;
   2134       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
   2135         Idxs[i] = SrcNumElts;
   2136       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
   2137 
   2138       // Insert the second operand into the first operand.
   2139 
   2140       // Note that there is no guarantee that instruction lowering will actually
   2141       // produce a vinsertf128 instruction for the created shuffles. In
   2142       // particular, the 0 immediate case involves no lane changes, so it can
   2143       // be handled as a blend.
   2144 
   2145       // Example of shuffle mask for 32-bit elements:
   2146       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
   2147       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
   2148 
   2149       // First fill with identify mask.
   2150       for (unsigned i = 0; i != DstNumElts; ++i)
   2151         Idxs[i] = i;
   2152       // Then replace the elements where we need to insert.
   2153       for (unsigned i = 0; i != SrcNumElts; ++i)
   2154         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
   2155       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
   2156 
   2157       // If the intrinsic has a mask operand, handle that.
   2158       if (CI->getNumArgOperands() == 5)
   2159         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
   2160                             CI->getArgOperand(3));
   2161     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
   2162                          Name == "avx2.vextracti128" ||
   2163                          Name.startswith("avx512.mask.vextract"))) {
   2164       Value *Op0 = CI->getArgOperand(0);
   2165       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2166       unsigned DstNumElts = CI->getType()->getVectorNumElements();
   2167       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
   2168       unsigned Scale = SrcNumElts / DstNumElts;
   2169 
   2170       // Mask off the high bits of the immediate value; hardware ignores those.
   2171       Imm = Imm % Scale;
   2172 
   2173       // Get indexes for the subvector of the input vector.
   2174       SmallVector<uint32_t, 8> Idxs(DstNumElts);
   2175       for (unsigned i = 0; i != DstNumElts; ++i) {
   2176         Idxs[i] = i + (Imm * DstNumElts);
   2177       }
   2178       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
   2179 
   2180       // If the intrinsic has a mask operand, handle that.
   2181       if (CI->getNumArgOperands() == 4)
   2182         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2183                             CI->getArgOperand(2));
   2184     } else if (!IsX86 && Name == "stackprotectorcheck") {
   2185       Rep = nullptr;
   2186     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
   2187                          Name.startswith("avx512.mask.perm.di."))) {
   2188       Value *Op0 = CI->getArgOperand(0);
   2189       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2190       VectorType *VecTy = cast<VectorType>(CI->getType());
   2191       unsigned NumElts = VecTy->getNumElements();
   2192 
   2193       SmallVector<uint32_t, 8> Idxs(NumElts);
   2194       for (unsigned i = 0; i != NumElts; ++i)
   2195         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
   2196 
   2197       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
   2198 
   2199       if (CI->getNumArgOperands() == 4)
   2200         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2201                             CI->getArgOperand(2));
   2202     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
   2203                          Name == "avx2.vperm2i128")) {
   2204       // The immediate permute control byte looks like this:
   2205       //    [1:0] - select 128 bits from sources for low half of destination
   2206       //    [2]   - ignore
   2207       //    [3]   - zero low half of destination
   2208       //    [5:4] - select 128 bits from sources for high half of destination
   2209       //    [6]   - ignore
   2210       //    [7]   - zero high half of destination
   2211 
   2212       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   2213 
   2214       unsigned NumElts = CI->getType()->getVectorNumElements();
   2215       unsigned HalfSize = NumElts / 2;
   2216       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
   2217 
   2218       // Determine which operand(s) are actually in use for this instruction.
   2219       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
   2220       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
   2221 
   2222       // If needed, replace operands based on zero mask.
   2223       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
   2224       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
   2225 
   2226       // Permute low half of result.
   2227       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
   2228       for (unsigned i = 0; i < HalfSize; ++i)
   2229         ShuffleMask[i] = StartIndex + i;
   2230 
   2231       // Permute high half of result.
   2232       StartIndex = (Imm & 0x10) ? HalfSize : 0;
   2233       for (unsigned i = 0; i < HalfSize; ++i)
   2234         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
   2235 
   2236       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
   2237 
   2238     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
   2239                          Name == "sse2.pshuf.d" ||
   2240                          Name.startswith("avx512.mask.vpermil.p") ||
   2241                          Name.startswith("avx512.mask.pshuf.d."))) {
   2242       Value *Op0 = CI->getArgOperand(0);
   2243       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2244       VectorType *VecTy = cast<VectorType>(CI->getType());
   2245       unsigned NumElts = VecTy->getNumElements();
   2246       // Calculate the size of each index in the immediate.
   2247       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
   2248       unsigned IdxMask = ((1 << IdxSize) - 1);
   2249 
   2250       SmallVector<uint32_t, 8> Idxs(NumElts);
   2251       // Lookup the bits for this element, wrapping around the immediate every
   2252       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
   2253       // to offset by the first index of each group.
   2254       for (unsigned i = 0; i != NumElts; ++i)
   2255         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
   2256 
   2257       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
   2258 
   2259       if (CI->getNumArgOperands() == 4)
   2260         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2261                             CI->getArgOperand(2));
   2262     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
   2263                          Name.startswith("avx512.mask.pshufl.w."))) {
   2264       Value *Op0 = CI->getArgOperand(0);
   2265       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2266       unsigned NumElts = CI->getType()->getVectorNumElements();
   2267 
   2268       SmallVector<uint32_t, 16> Idxs(NumElts);
   2269       for (unsigned l = 0; l != NumElts; l += 8) {
   2270         for (unsigned i = 0; i != 4; ++i)
   2271           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
   2272         for (unsigned i = 4; i != 8; ++i)
   2273           Idxs[i + l] = i + l;
   2274       }
   2275 
   2276       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
   2277 
   2278       if (CI->getNumArgOperands() == 4)
   2279         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2280                             CI->getArgOperand(2));
   2281     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
   2282                          Name.startswith("avx512.mask.pshufh.w."))) {
   2283       Value *Op0 = CI->getArgOperand(0);
   2284       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
   2285       unsigned NumElts = CI->getType()->getVectorNumElements();
   2286 
   2287       SmallVector<uint32_t, 16> Idxs(NumElts);
   2288       for (unsigned l = 0; l != NumElts; l += 8) {
   2289         for (unsigned i = 0; i != 4; ++i)
   2290           Idxs[i + l] = i + l;
   2291         for (unsigned i = 0; i != 4; ++i)
   2292           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
   2293       }
   2294 
   2295       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
   2296 
   2297       if (CI->getNumArgOperands() == 4)
   2298         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2299                             CI->getArgOperand(2));
   2300     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
   2301       Value *Op0 = CI->getArgOperand(0);
   2302       Value *Op1 = CI->getArgOperand(1);
   2303       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
   2304       unsigned NumElts = CI->getType()->getVectorNumElements();
   2305 
   2306       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
   2307       unsigned HalfLaneElts = NumLaneElts / 2;
   2308 
   2309       SmallVector<uint32_t, 16> Idxs(NumElts);
   2310       for (unsigned i = 0; i != NumElts; ++i) {
   2311         // Base index is the starting element of the lane.
   2312         Idxs[i] = i - (i % NumLaneElts);
   2313         // If we are half way through the lane switch to the other source.
   2314         if ((i % NumLaneElts) >= HalfLaneElts)
   2315           Idxs[i] += NumElts;
   2316         // Now select the specific element. By adding HalfLaneElts bits from
   2317         // the immediate. Wrapping around the immediate every 8-bits.
   2318         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
   2319       }
   2320 
   2321       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
   2322 
   2323       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
   2324                           CI->getArgOperand(3));
   2325     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
   2326                          Name.startswith("avx512.mask.movshdup") ||
   2327                          Name.startswith("avx512.mask.movsldup"))) {
   2328       Value *Op0 = CI->getArgOperand(0);
   2329       unsigned NumElts = CI->getType()->getVectorNumElements();
   2330       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
   2331 
   2332       unsigned Offset = 0;
   2333       if (Name.startswith("avx512.mask.movshdup."))
   2334         Offset = 1;
   2335 
   2336       SmallVector<uint32_t, 16> Idxs(NumElts);
   2337       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
   2338         for (unsigned i = 0; i != NumLaneElts; i += 2) {
   2339           Idxs[i + l + 0] = i + l + Offset;
   2340           Idxs[i + l + 1] = i + l + Offset;
   2341         }
   2342 
   2343       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
   2344 
   2345       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   2346                           CI->getArgOperand(1));
   2347     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
   2348                          Name.startswith("avx512.mask.unpckl."))) {
   2349       Value *Op0 = CI->getArgOperand(0);
   2350       Value *Op1 = CI->getArgOperand(1);
   2351       int NumElts = CI->getType()->getVectorNumElements();
   2352       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
   2353 
   2354       SmallVector<uint32_t, 64> Idxs(NumElts);
   2355       for (int l = 0; l != NumElts; l += NumLaneElts)
   2356         for (int i = 0; i != NumLaneElts; ++i)
   2357           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
   2358 
   2359       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
   2360 
   2361       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2362                           CI->getArgOperand(2));
   2363     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
   2364                          Name.startswith("avx512.mask.unpckh."))) {
   2365       Value *Op0 = CI->getArgOperand(0);
   2366       Value *Op1 = CI->getArgOperand(1);
   2367       int NumElts = CI->getType()->getVectorNumElements();
   2368       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
   2369 
   2370       SmallVector<uint32_t, 64> Idxs(NumElts);
   2371       for (int l = 0; l != NumElts; l += NumLaneElts)
   2372         for (int i = 0; i != NumLaneElts; ++i)
   2373           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
   2374 
   2375       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
   2376 
   2377       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2378                           CI->getArgOperand(2));
   2379     } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
   2380       Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
   2381       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2382                           CI->getArgOperand(2));
   2383     } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
   2384       Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
   2385                               CI->getArgOperand(1));
   2386       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2387                           CI->getArgOperand(2));
   2388     } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
   2389       Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
   2390       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2391                           CI->getArgOperand(2));
   2392     } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
   2393       Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
   2394       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2395                           CI->getArgOperand(2));
   2396     } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
   2397       VectorType *FTy = cast<VectorType>(CI->getType());
   2398       VectorType *ITy = VectorType::getInteger(FTy);
   2399       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
   2400                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
   2401       Rep = Builder.CreateBitCast(Rep, FTy);
   2402       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2403                           CI->getArgOperand(2));
   2404     } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
   2405       VectorType *FTy = cast<VectorType>(CI->getType());
   2406       VectorType *ITy = VectorType::getInteger(FTy);
   2407       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
   2408       Rep = Builder.CreateAnd(Rep,
   2409                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
   2410       Rep = Builder.CreateBitCast(Rep, FTy);
   2411       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2412                           CI->getArgOperand(2));
   2413     } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
   2414       VectorType *FTy = cast<VectorType>(CI->getType());
   2415       VectorType *ITy = VectorType::getInteger(FTy);
   2416       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
   2417                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
   2418       Rep = Builder.CreateBitCast(Rep, FTy);
   2419       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2420                           CI->getArgOperand(2));
   2421     } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
   2422       VectorType *FTy = cast<VectorType>(CI->getType());
   2423       VectorType *ITy = VectorType::getInteger(FTy);
   2424       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
   2425                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
   2426       Rep = Builder.CreateBitCast(Rep, FTy);
   2427       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2428                           CI->getArgOperand(2));
   2429     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
   2430       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
   2431       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2432                           CI->getArgOperand(2));
   2433     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
   2434       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
   2435       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2436                           CI->getArgOperand(2));
   2437     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
   2438       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
   2439       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2440                           CI->getArgOperand(2));
   2441     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
   2442       if (Name.endswith(".512")) {
   2443         Intrinsic::ID IID;
   2444         if (Name[17] == 's')
   2445           IID = Intrinsic::x86_avx512_add_ps_512;
   2446         else
   2447           IID = Intrinsic::x86_avx512_add_pd_512;
   2448 
   2449         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2450                                  { CI->getArgOperand(0), CI->getArgOperand(1),
   2451                                    CI->getArgOperand(4) });
   2452       } else {
   2453         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
   2454       }
   2455       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2456                           CI->getArgOperand(2));
   2457     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
   2458       if (Name.endswith(".512")) {
   2459         Intrinsic::ID IID;
   2460         if (Name[17] == 's')
   2461           IID = Intrinsic::x86_avx512_div_ps_512;
   2462         else
   2463           IID = Intrinsic::x86_avx512_div_pd_512;
   2464 
   2465         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2466                                  { CI->getArgOperand(0), CI->getArgOperand(1),
   2467                                    CI->getArgOperand(4) });
   2468       } else {
   2469         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
   2470       }
   2471       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2472                           CI->getArgOperand(2));
   2473     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
   2474       if (Name.endswith(".512")) {
   2475         Intrinsic::ID IID;
   2476         if (Name[17] == 's')
   2477           IID = Intrinsic::x86_avx512_mul_ps_512;
   2478         else
   2479           IID = Intrinsic::x86_avx512_mul_pd_512;
   2480 
   2481         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2482                                  { CI->getArgOperand(0), CI->getArgOperand(1),
   2483                                    CI->getArgOperand(4) });
   2484       } else {
   2485         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
   2486       }
   2487       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2488                           CI->getArgOperand(2));
   2489     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
   2490       if (Name.endswith(".512")) {
   2491         Intrinsic::ID IID;
   2492         if (Name[17] == 's')
   2493           IID = Intrinsic::x86_avx512_sub_ps_512;
   2494         else
   2495           IID = Intrinsic::x86_avx512_sub_pd_512;
   2496 
   2497         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2498                                  { CI->getArgOperand(0), CI->getArgOperand(1),
   2499                                    CI->getArgOperand(4) });
   2500       } else {
   2501         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
   2502       }
   2503       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2504                           CI->getArgOperand(2));
   2505     } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
   2506                Name.drop_front(18) == ".512") {
   2507       Intrinsic::ID IID;
   2508       if (Name[17] == 's')
   2509         IID = Intrinsic::x86_avx512_max_ps_512;
   2510       else
   2511         IID = Intrinsic::x86_avx512_max_pd_512;
   2512 
   2513       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2514                                { CI->getArgOperand(0), CI->getArgOperand(1),
   2515                                  CI->getArgOperand(4) });
   2516       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2517                           CI->getArgOperand(2));
   2518     } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
   2519                Name.drop_front(18) == ".512") {
   2520       Intrinsic::ID IID;
   2521       if (Name[17] == 's')
   2522         IID = Intrinsic::x86_avx512_min_ps_512;
   2523       else
   2524         IID = Intrinsic::x86_avx512_min_pd_512;
   2525 
   2526       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2527                                { CI->getArgOperand(0), CI->getArgOperand(1),
   2528                                  CI->getArgOperand(4) });
   2529       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2530                           CI->getArgOperand(2));
   2531     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
   2532       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
   2533                                                          Intrinsic::ctlz,
   2534                                                          CI->getType()),
   2535                                { CI->getArgOperand(0), Builder.getInt1(false) });
   2536       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
   2537                           CI->getArgOperand(1));
   2538     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
   2539       bool IsImmediate = Name[16] == 'i' ||
   2540                          (Name.size() > 18 && Name[18] == 'i');
   2541       bool IsVariable = Name[16] == 'v';
   2542       char Size = Name[16] == '.' ? Name[17] :
   2543                   Name[17] == '.' ? Name[18] :
   2544                   Name[18] == '.' ? Name[19] :
   2545                                     Name[20];
   2546 
   2547       Intrinsic::ID IID;
   2548       if (IsVariable && Name[17] != '.') {
   2549         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
   2550           IID = Intrinsic::x86_avx2_psllv_q;
   2551         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
   2552           IID = Intrinsic::x86_avx2_psllv_q_256;
   2553         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
   2554           IID = Intrinsic::x86_avx2_psllv_d;
   2555         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
   2556           IID = Intrinsic::x86_avx2_psllv_d_256;
   2557         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
   2558           IID = Intrinsic::x86_avx512_psllv_w_128;
   2559         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
   2560           IID = Intrinsic::x86_avx512_psllv_w_256;
   2561         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
   2562           IID = Intrinsic::x86_avx512_psllv_w_512;
   2563         else
   2564           llvm_unreachable("Unexpected size");
   2565       } else if (Name.endswith(".128")) {
   2566         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
   2567           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
   2568                             : Intrinsic::x86_sse2_psll_d;
   2569         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
   2570           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
   2571                             : Intrinsic::x86_sse2_psll_q;
   2572         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
   2573           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
   2574                             : Intrinsic::x86_sse2_psll_w;
   2575         else
   2576           llvm_unreachable("Unexpected size");
   2577       } else if (Name.endswith(".256")) {
   2578         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
   2579           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
   2580                             : Intrinsic::x86_avx2_psll_d;
   2581         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
   2582           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
   2583                             : Intrinsic::x86_avx2_psll_q;
   2584         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
   2585           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
   2586                             : Intrinsic::x86_avx2_psll_w;
   2587         else
   2588           llvm_unreachable("Unexpected size");
   2589       } else {
   2590         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
   2591           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
   2592                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
   2593                               Intrinsic::x86_avx512_psll_d_512;
   2594         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
   2595           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
   2596                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
   2597                               Intrinsic::x86_avx512_psll_q_512;
   2598         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
   2599           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
   2600                             : Intrinsic::x86_avx512_psll_w_512;
   2601         else
   2602           llvm_unreachable("Unexpected size");
   2603       }
   2604 
   2605       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
   2606     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
   2607       bool IsImmediate = Name[16] == 'i' ||
   2608                          (Name.size() > 18 && Name[18] == 'i');
   2609       bool IsVariable = Name[16] == 'v';
   2610       char Size = Name[16] == '.' ? Name[17] :
   2611                   Name[17] == '.' ? Name[18] :
   2612                   Name[18] == '.' ? Name[19] :
   2613                                     Name[20];
   2614 
   2615       Intrinsic::ID IID;
   2616       if (IsVariable && Name[17] != '.') {
   2617         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
   2618           IID = Intrinsic::x86_avx2_psrlv_q;
   2619         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
   2620           IID = Intrinsic::x86_avx2_psrlv_q_256;
   2621         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
   2622           IID = Intrinsic::x86_avx2_psrlv_d;
   2623         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
   2624           IID = Intrinsic::x86_avx2_psrlv_d_256;
   2625         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
   2626           IID = Intrinsic::x86_avx512_psrlv_w_128;
   2627         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
   2628           IID = Intrinsic::x86_avx512_psrlv_w_256;
   2629         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
   2630           IID = Intrinsic::x86_avx512_psrlv_w_512;
   2631         else
   2632           llvm_unreachable("Unexpected size");
   2633       } else if (Name.endswith(".128")) {
   2634         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
   2635           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
   2636                             : Intrinsic::x86_sse2_psrl_d;
   2637         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
   2638           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
   2639                             : Intrinsic::x86_sse2_psrl_q;
   2640         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
   2641           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
   2642                             : Intrinsic::x86_sse2_psrl_w;
   2643         else
   2644           llvm_unreachable("Unexpected size");
   2645       } else if (Name.endswith(".256")) {
   2646         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
   2647           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
   2648                             : Intrinsic::x86_avx2_psrl_d;
   2649         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
   2650           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
   2651                             : Intrinsic::x86_avx2_psrl_q;
   2652         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
   2653           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
   2654                             : Intrinsic::x86_avx2_psrl_w;
   2655         else
   2656           llvm_unreachable("Unexpected size");
   2657       } else {
   2658         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
   2659           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
   2660                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
   2661                               Intrinsic::x86_avx512_psrl_d_512;
   2662         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
   2663           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
   2664                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
   2665                               Intrinsic::x86_avx512_psrl_q_512;
   2666         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
   2667           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
   2668                             : Intrinsic::x86_avx512_psrl_w_512;
   2669         else
   2670           llvm_unreachable("Unexpected size");
   2671       }
   2672 
   2673       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
   2674     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
   2675       bool IsImmediate = Name[16] == 'i' ||
   2676                          (Name.size() > 18 && Name[18] == 'i');
   2677       bool IsVariable = Name[16] == 'v';
   2678       char Size = Name[16] == '.' ? Name[17] :
   2679                   Name[17] == '.' ? Name[18] :
   2680                   Name[18] == '.' ? Name[19] :
   2681                                     Name[20];
   2682 
   2683       Intrinsic::ID IID;
   2684       if (IsVariable && Name[17] != '.') {
   2685         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
   2686           IID = Intrinsic::x86_avx2_psrav_d;
   2687         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
   2688           IID = Intrinsic::x86_avx2_psrav_d_256;
   2689         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
   2690           IID = Intrinsic::x86_avx512_psrav_w_128;
   2691         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
   2692           IID = Intrinsic::x86_avx512_psrav_w_256;
   2693         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
   2694           IID = Intrinsic::x86_avx512_psrav_w_512;
   2695         else
   2696           llvm_unreachable("Unexpected size");
   2697       } else if (Name.endswith(".128")) {
   2698         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
   2699           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
   2700                             : Intrinsic::x86_sse2_psra_d;
   2701         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
   2702           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
   2703                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
   2704                               Intrinsic::x86_avx512_psra_q_128;
   2705         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
   2706           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
   2707                             : Intrinsic::x86_sse2_psra_w;
   2708         else
   2709           llvm_unreachable("Unexpected size");
   2710       } else if (Name.endswith(".256")) {
   2711         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
   2712           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
   2713                             : Intrinsic::x86_avx2_psra_d;
   2714         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
   2715           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
   2716                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
   2717                               Intrinsic::x86_avx512_psra_q_256;
   2718         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
   2719           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
   2720                             : Intrinsic::x86_avx2_psra_w;
   2721         else
   2722           llvm_unreachable("Unexpected size");
   2723       } else {
   2724         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
   2725           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
   2726                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
   2727                               Intrinsic::x86_avx512_psra_d_512;
   2728         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
   2729           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
   2730                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
   2731                               Intrinsic::x86_avx512_psra_q_512;
   2732         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
   2733           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
   2734                             : Intrinsic::x86_avx512_psra_w_512;
   2735         else
   2736           llvm_unreachable("Unexpected size");
   2737       }
   2738 
   2739       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
   2740     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
   2741       Rep = upgradeMaskedMove(Builder, *CI);
   2742     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
   2743       Rep = UpgradeMaskToInt(Builder, *CI);
   2744     } else if (IsX86 && Name.endswith(".movntdqa")) {
   2745       Module *M = F->getParent();
   2746       MDNode *Node = MDNode::get(
   2747           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
   2748 
   2749       Value *Ptr = CI->getArgOperand(0);
   2750       VectorType *VTy = cast<VectorType>(CI->getType());
   2751 
   2752       // Convert the type of the pointer to a pointer to the stored type.
   2753       Value *BC =
   2754           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
   2755       LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
   2756       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
   2757       Rep = LI;
   2758     } else if (IsX86 &&
   2759                (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
   2760                 Name.startswith("avx512.mask.pavg"))) {
   2761       // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
   2762       // llvm.x86.avx512.mask.pavg.b/w
   2763       Value *A = CI->getArgOperand(0);
   2764       Value *B = CI->getArgOperand(1);
   2765       VectorType *ZextType = VectorType::getExtendedElementVectorType(
   2766           cast<VectorType>(A->getType()));
   2767       Value *ExtendedA = Builder.CreateZExt(A, ZextType);
   2768       Value *ExtendedB = Builder.CreateZExt(B, ZextType);
   2769       Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
   2770       Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
   2771       Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
   2772       Rep = Builder.CreateTrunc(ShiftR, A->getType());
   2773       if (CI->getNumArgOperands() > 2) {
   2774         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
   2775                             CI->getArgOperand(2));
   2776       }
   2777     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
   2778                          Name.startswith("fma.vfmsub.") ||
   2779                          Name.startswith("fma.vfnmadd.") ||
   2780                          Name.startswith("fma.vfnmsub."))) {
   2781       bool NegMul = Name[6] == 'n';
   2782       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
   2783       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
   2784 
   2785       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   2786                        CI->getArgOperand(2) };
   2787 
   2788       if (IsScalar) {
   2789         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
   2790         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
   2791         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
   2792       }
   2793 
   2794       if (NegMul && !IsScalar)
   2795         Ops[0] = Builder.CreateFNeg(Ops[0]);
   2796       if (NegMul && IsScalar)
   2797         Ops[1] = Builder.CreateFNeg(Ops[1]);
   2798       if (NegAcc)
   2799         Ops[2] = Builder.CreateFNeg(Ops[2]);
   2800 
   2801       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
   2802                                                          Intrinsic::fma,
   2803                                                          Ops[0]->getType()),
   2804                                Ops);
   2805 
   2806       if (IsScalar)
   2807         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
   2808                                           (uint64_t)0);
   2809     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
   2810       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   2811                        CI->getArgOperand(2) };
   2812 
   2813       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
   2814       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
   2815       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
   2816 
   2817       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
   2818                                                          Intrinsic::fma,
   2819                                                          Ops[0]->getType()),
   2820                                Ops);
   2821 
   2822       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
   2823                                         Rep, (uint64_t)0);
   2824     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
   2825                          Name.startswith("avx512.maskz.vfmadd.s") ||
   2826                          Name.startswith("avx512.mask3.vfmadd.s") ||
   2827                          Name.startswith("avx512.mask3.vfmsub.s") ||
   2828                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
   2829       bool IsMask3 = Name[11] == '3';
   2830       bool IsMaskZ = Name[11] == 'z';
   2831       // Drop the "avx512.mask." to make it easier.
   2832       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
   2833       bool NegMul = Name[2] == 'n';
   2834       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
   2835 
   2836       Value *A = CI->getArgOperand(0);
   2837       Value *B = CI->getArgOperand(1);
   2838       Value *C = CI->getArgOperand(2);
   2839 
   2840       if (NegMul && (IsMask3 || IsMaskZ))
   2841         A = Builder.CreateFNeg(A);
   2842       if (NegMul && !(IsMask3 || IsMaskZ))
   2843         B = Builder.CreateFNeg(B);
   2844       if (NegAcc)
   2845         C = Builder.CreateFNeg(C);
   2846 
   2847       A = Builder.CreateExtractElement(A, (uint64_t)0);
   2848       B = Builder.CreateExtractElement(B, (uint64_t)0);
   2849       C = Builder.CreateExtractElement(C, (uint64_t)0);
   2850 
   2851       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
   2852           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
   2853         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
   2854 
   2855         Intrinsic::ID IID;
   2856         if (Name.back() == 'd')
   2857           IID = Intrinsic::x86_avx512_vfmadd_f64;
   2858         else
   2859           IID = Intrinsic::x86_avx512_vfmadd_f32;
   2860         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
   2861         Rep = Builder.CreateCall(FMA, Ops);
   2862       } else {
   2863         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
   2864                                                   Intrinsic::fma,
   2865                                                   A->getType());
   2866         Rep = Builder.CreateCall(FMA, { A, B, C });
   2867       }
   2868 
   2869       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
   2870                         IsMask3 ? C : A;
   2871 
   2872       // For Mask3 with NegAcc, we need to create a new extractelement that
   2873       // avoids the negation above.
   2874       if (NegAcc && IsMask3)
   2875         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
   2876                                                 (uint64_t)0);
   2877 
   2878       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
   2879                                 Rep, PassThru);
   2880       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
   2881                                         Rep, (uint64_t)0);
   2882     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
   2883                          Name.startswith("avx512.mask.vfnmadd.p") ||
   2884                          Name.startswith("avx512.mask.vfnmsub.p") ||
   2885                          Name.startswith("avx512.mask3.vfmadd.p") ||
   2886                          Name.startswith("avx512.mask3.vfmsub.p") ||
   2887                          Name.startswith("avx512.mask3.vfnmsub.p") ||
   2888                          Name.startswith("avx512.maskz.vfmadd.p"))) {
   2889       bool IsMask3 = Name[11] == '3';
   2890       bool IsMaskZ = Name[11] == 'z';
   2891       // Drop the "avx512.mask." to make it easier.
   2892       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
   2893       bool NegMul = Name[2] == 'n';
   2894       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
   2895 
   2896       Value *A = CI->getArgOperand(0);
   2897       Value *B = CI->getArgOperand(1);
   2898       Value *C = CI->getArgOperand(2);
   2899 
   2900       if (NegMul && (IsMask3 || IsMaskZ))
   2901         A = Builder.CreateFNeg(A);
   2902       if (NegMul && !(IsMask3 || IsMaskZ))
   2903         B = Builder.CreateFNeg(B);
   2904       if (NegAcc)
   2905         C = Builder.CreateFNeg(C);
   2906 
   2907       if (CI->getNumArgOperands() == 5 &&
   2908           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
   2909            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
   2910         Intrinsic::ID IID;
   2911         // Check the character before ".512" in string.
   2912         if (Name[Name.size()-5] == 's')
   2913           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
   2914         else
   2915           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
   2916 
   2917         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2918                                  { A, B, C, CI->getArgOperand(4) });
   2919       } else {
   2920         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
   2921                                                   Intrinsic::fma,
   2922                                                   A->getType());
   2923         Rep = Builder.CreateCall(FMA, { A, B, C });
   2924       }
   2925 
   2926       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
   2927                         IsMask3 ? CI->getArgOperand(2) :
   2928                                   CI->getArgOperand(0);
   2929 
   2930       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
   2931     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
   2932                          Name.startswith("fma.vfmsubadd.p"))) {
   2933       bool IsSubAdd = Name[7] == 's';
   2934       int NumElts = CI->getType()->getVectorNumElements();
   2935 
   2936       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   2937                        CI->getArgOperand(2) };
   2938 
   2939       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
   2940                                                 Ops[0]->getType());
   2941       Value *Odd = Builder.CreateCall(FMA, Ops);
   2942       Ops[2] = Builder.CreateFNeg(Ops[2]);
   2943       Value *Even = Builder.CreateCall(FMA, Ops);
   2944 
   2945       if (IsSubAdd)
   2946         std::swap(Even, Odd);
   2947 
   2948       SmallVector<uint32_t, 32> Idxs(NumElts);
   2949       for (int i = 0; i != NumElts; ++i)
   2950         Idxs[i] = i + (i % 2) * NumElts;
   2951 
   2952       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
   2953     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
   2954                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
   2955                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
   2956                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
   2957       bool IsMask3 = Name[11] == '3';
   2958       bool IsMaskZ = Name[11] == 'z';
   2959       // Drop the "avx512.mask." to make it easier.
   2960       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
   2961       bool IsSubAdd = Name[3] == 's';
   2962       if (CI->getNumArgOperands() == 5 &&
   2963           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
   2964            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
   2965         Intrinsic::ID IID;
   2966         // Check the character before ".512" in string.
   2967         if (Name[Name.size()-5] == 's')
   2968           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
   2969         else
   2970           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
   2971 
   2972         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   2973                          CI->getArgOperand(2), CI->getArgOperand(4) };
   2974         if (IsSubAdd)
   2975           Ops[2] = Builder.CreateFNeg(Ops[2]);
   2976 
   2977         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
   2978                                  {CI->getArgOperand(0), CI->getArgOperand(1),
   2979                                   CI->getArgOperand(2), CI->getArgOperand(4)});
   2980       } else {
   2981         int NumElts = CI->getType()->getVectorNumElements();
   2982 
   2983         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   2984                          CI->getArgOperand(2) };
   2985 
   2986         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
   2987                                                   Ops[0]->getType());
   2988         Value *Odd = Builder.CreateCall(FMA, Ops);
   2989         Ops[2] = Builder.CreateFNeg(Ops[2]);
   2990         Value *Even = Builder.CreateCall(FMA, Ops);
   2991 
   2992         if (IsSubAdd)
   2993           std::swap(Even, Odd);
   2994 
   2995         SmallVector<uint32_t, 32> Idxs(NumElts);
   2996         for (int i = 0; i != NumElts; ++i)
   2997           Idxs[i] = i + (i % 2) * NumElts;
   2998 
   2999         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
   3000       }
   3001 
   3002       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
   3003                         IsMask3 ? CI->getArgOperand(2) :
   3004                                   CI->getArgOperand(0);
   3005 
   3006       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
   3007     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
   3008                          Name.startswith("avx512.maskz.pternlog."))) {
   3009       bool ZeroMask = Name[11] == 'z';
   3010       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
   3011       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
   3012       Intrinsic::ID IID;
   3013       if (VecWidth == 128 && EltWidth == 32)
   3014         IID = Intrinsic::x86_avx512_pternlog_d_128;
   3015       else if (VecWidth == 256 && EltWidth == 32)
   3016         IID = Intrinsic::x86_avx512_pternlog_d_256;
   3017       else if (VecWidth == 512 && EltWidth == 32)
   3018         IID = Intrinsic::x86_avx512_pternlog_d_512;
   3019       else if (VecWidth == 128 && EltWidth == 64)
   3020         IID = Intrinsic::x86_avx512_pternlog_q_128;
   3021       else if (VecWidth == 256 && EltWidth == 64)
   3022         IID = Intrinsic::x86_avx512_pternlog_q_256;
   3023       else if (VecWidth == 512 && EltWidth == 64)
   3024         IID = Intrinsic::x86_avx512_pternlog_q_512;
   3025       else
   3026         llvm_unreachable("Unexpected intrinsic");
   3027 
   3028       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
   3029                         CI->getArgOperand(2), CI->getArgOperand(3) };
   3030       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
   3031                                Args);
   3032       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
   3033                                  : CI->getArgOperand(0);
   3034       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
   3035     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
   3036                          Name.startswith("avx512.maskz.vpmadd52"))) {
   3037       bool ZeroMask = Name[11] == 'z';
   3038       bool High = Name[20] == 'h' || Name[21] == 'h';
   3039       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
   3040       Intrinsic::ID IID;
   3041       if (VecWidth == 128 && !High)
   3042         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
   3043       else if (VecWidth == 256 && !High)
   3044         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
   3045       else if (VecWidth == 512 && !High)
   3046         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
   3047       else if (VecWidth == 128 && High)
   3048         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
   3049       else if (VecWidth == 256 && High)
   3050         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
   3051       else if (VecWidth == 512 && High)
   3052         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
   3053       else
   3054         llvm_unreachable("Unexpected intrinsic");
   3055 
   3056       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
   3057                         CI->getArgOperand(2) };
   3058       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
   3059                                Args);
   3060       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
   3061                                  : CI->getArgOperand(0);
   3062       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
   3063     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
   3064                          Name.startswith("avx512.mask.vpermt2var.") ||
   3065                          Name.startswith("avx512.maskz.vpermt2var."))) {
   3066       bool ZeroMask = Name[11] == 'z';
   3067       bool IndexForm = Name[17] == 'i';
   3068       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
   3069       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
   3070       bool IsFloat = CI->getType()->isFPOrFPVectorTy();
   3071       Intrinsic::ID IID;
   3072       if (VecWidth == 128 && EltWidth == 32 && IsFloat)
   3073         IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
   3074       else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
   3075         IID = Intrinsic::x86_avx512_vpermi2var_d_128;
   3076       else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
   3077         IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
   3078       else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
   3079         IID = Intrinsic::x86_avx512_vpermi2var_q_128;
   3080       else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
   3081         IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
   3082       else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
   3083         IID = Intrinsic::x86_avx512_vpermi2var_d_256;
   3084       else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
   3085         IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
   3086       else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
   3087         IID = Intrinsic::x86_avx512_vpermi2var_q_256;
   3088       else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
   3089         IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
   3090       else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
   3091         IID = Intrinsic::x86_avx512_vpermi2var_d_512;
   3092       else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
   3093         IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
   3094       else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
   3095         IID = Intrinsic::x86_avx512_vpermi2var_q_512;
   3096       else if (VecWidth == 128 && EltWidth == 16)
   3097         IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
   3098       else if (VecWidth == 256 && EltWidth == 16)
   3099         IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
   3100       else if (VecWidth == 512 && EltWidth == 16)
   3101         IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
   3102       else if (VecWidth == 128 && EltWidth == 8)
   3103         IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
   3104       else if (VecWidth == 256 && EltWidth == 8)
   3105         IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
   3106       else if (VecWidth == 512 && EltWidth == 8)
   3107         IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
   3108       else
   3109         llvm_unreachable("Unexpected intrinsic");
   3110 
   3111       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
   3112                         CI->getArgOperand(2) };
   3113 
   3114       // If this isn't index form we need to swap operand 0 and 1.
   3115       if (!IndexForm)
   3116         std::swap(Args[0], Args[1]);
   3117 
   3118       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
   3119                                Args);
   3120       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
   3121                                  : Builder.CreateBitCast(CI->getArgOperand(1),
   3122                                                          CI->getType());
   3123       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
   3124     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
   3125                          Name.startswith("avx512.maskz.vpdpbusd.") ||
   3126                          Name.startswith("avx512.mask.vpdpbusds.") ||
   3127                          Name.startswith("avx512.maskz.vpdpbusds."))) {
   3128       bool ZeroMask = Name[11] == 'z';
   3129       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
   3130       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
   3131       Intrinsic::ID IID;
   3132       if (VecWidth == 128 && !IsSaturating)
   3133         IID = Intrinsic::x86_avx512_vpdpbusd_128;
   3134       else if (VecWidth == 256 && !IsSaturating)
   3135         IID = Intrinsic::x86_avx512_vpdpbusd_256;
   3136       else if (VecWidth == 512 && !IsSaturating)
   3137         IID = Intrinsic::x86_avx512_vpdpbusd_512;
   3138       else if (VecWidth == 128 && IsSaturating)
   3139         IID = Intrinsic::x86_avx512_vpdpbusds_128;
   3140       else if (VecWidth == 256 && IsSaturating)
   3141         IID = Intrinsic::x86_avx512_vpdpbusds_256;
   3142       else if (VecWidth == 512 && IsSaturating)
   3143         IID = Intrinsic::x86_avx512_vpdpbusds_512;
   3144       else
   3145         llvm_unreachable("Unexpected intrinsic");
   3146 
   3147       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   3148                         CI->getArgOperand(2)  };
   3149       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
   3150                                Args);
   3151       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
   3152                                  : CI->getArgOperand(0);
   3153       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
   3154     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
   3155                          Name.startswith("avx512.maskz.vpdpwssd.") ||
   3156                          Name.startswith("avx512.mask.vpdpwssds.") ||
   3157                          Name.startswith("avx512.maskz.vpdpwssds."))) {
   3158       bool ZeroMask = Name[11] == 'z';
   3159       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
   3160       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
   3161       Intrinsic::ID IID;
   3162       if (VecWidth == 128 && !IsSaturating)
   3163         IID = Intrinsic::x86_avx512_vpdpwssd_128;
   3164       else if (VecWidth == 256 && !IsSaturating)
   3165         IID = Intrinsic::x86_avx512_vpdpwssd_256;
   3166       else if (VecWidth == 512 && !IsSaturating)
   3167         IID = Intrinsic::x86_avx512_vpdpwssd_512;
   3168       else if (VecWidth == 128 && IsSaturating)
   3169         IID = Intrinsic::x86_avx512_vpdpwssds_128;
   3170       else if (VecWidth == 256 && IsSaturating)
   3171         IID = Intrinsic::x86_avx512_vpdpwssds_256;
   3172       else if (VecWidth == 512 && IsSaturating)
   3173         IID = Intrinsic::x86_avx512_vpdpwssds_512;
   3174       else
   3175         llvm_unreachable("Unexpected intrinsic");
   3176 
   3177       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
   3178                         CI->getArgOperand(2)  };
   3179       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
   3180                                Args);
   3181       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
   3182                                  : CI->getArgOperand(0);
   3183       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
   3184     } else if (IsX86 && Name.startswith("avx512.mask.") &&
   3185                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
   3186       // Rep will be updated by the call in the condition.
   3187     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
   3188       Value *Arg = CI->getArgOperand(0);
   3189       Value *Neg = Builder.CreateNeg(Arg, "neg");
   3190       Value *Cmp = Builder.CreateICmpSGE(
   3191           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
   3192       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
   3193     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
   3194                           Name == "max.ui" || Name == "max.ull")) {
   3195       Value *Arg0 = CI->getArgOperand(0);
   3196       Value *Arg1 = CI->getArgOperand(1);
   3197       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
   3198                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
   3199                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
   3200       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
   3201     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
   3202                           Name == "min.ui" || Name == "min.ull")) {
   3203       Value *Arg0 = CI->getArgOperand(0);
   3204       Value *Arg1 = CI->getArgOperand(1);
   3205       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
   3206                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
   3207                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
   3208       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
   3209     } else if (IsNVVM && Name == "clz.ll") {
   3210       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
   3211       Value *Arg = CI->getArgOperand(0);
   3212       Value *Ctlz = Builder.CreateCall(
   3213           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
   3214                                     {Arg->getType()}),
   3215           {Arg, Builder.getFalse()}, "ctlz");
   3216       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
   3217     } else if (IsNVVM && Name == "popc.ll") {
   3218       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
   3219       // i64.
   3220       Value *Arg = CI->getArgOperand(0);
   3221       Value *Popc = Builder.CreateCall(
   3222           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
   3223                                     {Arg->getType()}),
   3224           Arg, "ctpop");
   3225       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
   3226     } else if (IsNVVM && Name == "h2f") {
   3227       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
   3228                                    F->getParent(), Intrinsic::convert_from_fp16,
   3229                                    {Builder.getFloatTy()}),
   3230                                CI->getArgOperand(0), "h2f");
   3231     } else {
   3232       llvm_unreachable("Unknown function for CallInst upgrade.");
   3233     }
   3234 
   3235     if (Rep)
   3236       CI->replaceAllUsesWith(Rep);
   3237     CI->eraseFromParent();
   3238     return;
   3239   }
   3240 
   3241   const auto &DefaultCase = [&NewFn, &CI]() -> void {
   3242     // Handle generic mangling change, but nothing else
   3243     assert(
   3244         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
   3245         "Unknown function for CallInst upgrade and isn't just a name change");
   3246     CI->setCalledFunction(NewFn);
   3247   };
   3248   CallInst *NewCall = nullptr;
   3249   switch (NewFn->getIntrinsicID()) {
   3250   default: {
   3251     DefaultCase();
   3252     return;
   3253   }
   3254 
   3255   case Intrinsic::arm_neon_vld1:
   3256   case Intrinsic::arm_neon_vld2:
   3257   case Intrinsic::arm_neon_vld3:
   3258   case Intrinsic::arm_neon_vld4:
   3259   case Intrinsic::arm_neon_vld2lane:
   3260   case Intrinsic::arm_neon_vld3lane:
   3261   case Intrinsic::arm_neon_vld4lane:
   3262   case Intrinsic::arm_neon_vst1:
   3263   case Intrinsic::arm_neon_vst2:
   3264   case Intrinsic::arm_neon_vst3:
   3265   case Intrinsic::arm_neon_vst4:
   3266   case Intrinsic::arm_neon_vst2lane:
   3267   case Intrinsic::arm_neon_vst3lane:
   3268   case Intrinsic::arm_neon_vst4lane: {
   3269     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
   3270                                  CI->arg_operands().end());
   3271     NewCall = Builder.CreateCall(NewFn, Args);
   3272     break;
   3273   }
   3274 
   3275   case Intrinsic::bitreverse:
   3276     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
   3277     break;
   3278 
   3279   case Intrinsic::ctlz:
   3280   case Intrinsic::cttz:
   3281     assert(CI->getNumArgOperands() == 1 &&
   3282            "Mismatch between function args and call args");
   3283     NewCall =
   3284         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
   3285     break;
   3286 
   3287   case Intrinsic::objectsize: {
   3288     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
   3289                                    ? Builder.getFalse()
   3290                                    : CI->getArgOperand(2);
   3291     NewCall = Builder.CreateCall(
   3292         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
   3293     break;
   3294   }
   3295 
   3296   case Intrinsic::ctpop:
   3297     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
   3298     break;
   3299 
   3300   case Intrinsic::convert_from_fp16:
   3301     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
   3302     break;
   3303 
   3304   case Intrinsic::dbg_value:
   3305     // Upgrade from the old version that had an extra offset argument.
   3306     assert(CI->getNumArgOperands() == 4);
   3307     // Drop nonzero offsets instead of attempting to upgrade them.
   3308     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
   3309       if (Offset->isZeroValue()) {
   3310         NewCall = Builder.CreateCall(
   3311             NewFn,
   3312             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
   3313         break;
   3314       }
   3315     CI->eraseFromParent();
   3316     return;
   3317 
   3318   case Intrinsic::x86_xop_vfrcz_ss:
   3319   case Intrinsic::x86_xop_vfrcz_sd:
   3320     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
   3321     break;
   3322 
   3323   case Intrinsic::x86_xop_vpermil2pd:
   3324   case Intrinsic::x86_xop_vpermil2ps:
   3325   case Intrinsic::x86_xop_vpermil2pd_256:
   3326   case Intrinsic::x86_xop_vpermil2ps_256: {
   3327     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
   3328                                  CI->arg_operands().end());
   3329     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
   3330     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
   3331     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
   3332     NewCall = Builder.CreateCall(NewFn, Args);
   3333     break;
   3334   }
   3335 
   3336   case Intrinsic::x86_sse41_ptestc:
   3337   case Intrinsic::x86_sse41_ptestz:
   3338   case Intrinsic::x86_sse41_ptestnzc: {
   3339     // The arguments for these intrinsics used to be v4f32, and changed
   3340     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
   3341     // So, the only thing required is a bitcast for both arguments.
   3342     // First, check the arguments have the old type.
   3343     Value *Arg0 = CI->getArgOperand(0);
   3344     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
   3345       return;
   3346 
   3347     // Old intrinsic, add bitcasts
   3348     Value *Arg1 = CI->getArgOperand(1);
   3349 
   3350     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
   3351 
   3352     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
   3353     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
   3354 
   3355     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
   3356     break;
   3357   }
   3358 
   3359   case Intrinsic::x86_sse41_insertps:
   3360   case Intrinsic::x86_sse41_dppd:
   3361   case Intrinsic::x86_sse41_dpps:
   3362   case Intrinsic::x86_sse41_mpsadbw:
   3363   case Intrinsic::x86_avx_dp_ps_256:
   3364   case Intrinsic::x86_avx2_mpsadbw: {
   3365     // Need to truncate the last argument from i32 to i8 -- this argument models
   3366     // an inherently 8-bit immediate operand to these x86 instructions.
   3367     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
   3368                                  CI->arg_operands().end());
   3369 
   3370     // Replace the last argument with a trunc.
   3371     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
   3372     NewCall = Builder.CreateCall(NewFn, Args);
   3373     break;
   3374   }
   3375 
   3376   case Intrinsic::thread_pointer: {
   3377     NewCall = Builder.CreateCall(NewFn, {});
   3378     break;
   3379   }
   3380 
   3381   case Intrinsic::invariant_start:
   3382   case Intrinsic::invariant_end:
   3383   case Intrinsic::masked_load:
   3384   case Intrinsic::masked_store:
   3385   case Intrinsic::masked_gather:
   3386   case Intrinsic::masked_scatter: {
   3387     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
   3388                                  CI->arg_operands().end());
   3389     NewCall = Builder.CreateCall(NewFn, Args);
   3390     break;
   3391   }
   3392 
   3393   case Intrinsic::memcpy:
   3394   case Intrinsic::memmove:
   3395   case Intrinsic::memset: {
   3396     // We have to make sure that the call signature is what we're expecting.
   3397     // We only want to change the old signatures by removing the alignment arg:
   3398     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
   3399     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
   3400     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
   3401     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
   3402     // Note: i8*'s in the above can be any pointer type
   3403     if (CI->getNumArgOperands() != 5) {
   3404       DefaultCase();
   3405       return;
   3406     }
   3407     // Remove alignment argument (3), and add alignment attributes to the
   3408     // dest/src pointers.
   3409     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
   3410                       CI->getArgOperand(2), CI->getArgOperand(4)};
   3411     NewCall = Builder.CreateCall(NewFn, Args);
   3412     auto *MemCI = cast<MemIntrinsic>(NewCall);
   3413     // All mem intrinsics support dest alignment.
   3414     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
   3415     MemCI->setDestAlignment(Align->getZExtValue());
   3416     // Memcpy/Memmove also support source alignment.
   3417     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
   3418       MTI->setSourceAlignment(Align->getZExtValue());
   3419     break;
   3420   }
   3421   }
   3422   assert(NewCall && "Should have either set this variable or returned through "
   3423                     "the default case");
   3424   std::string Name = CI->getName();
   3425   if (!Name.empty()) {
   3426     CI->setName(Name + ".old");
   3427     NewCall->setName(Name);
   3428   }
   3429   CI->replaceAllUsesWith(NewCall);
   3430   CI->eraseFromParent();
   3431 }
   3432 
   3433 void llvm::UpgradeCallsToIntrinsic(Function *F) {
   3434   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
   3435 
   3436   // Check if this function should be upgraded and get the replacement function
   3437   // if there is one.
   3438   Function *NewFn;
   3439   if (UpgradeIntrinsicFunction(F, NewFn)) {
   3440     // Replace all users of the old function with the new function or new
   3441     // instructions. This is not a range loop because the call is deleted.
   3442     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
   3443       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
   3444         UpgradeIntrinsicCall(CI, NewFn);
   3445 
   3446     // Remove old function, no longer used, from the module.
   3447     F->eraseFromParent();
   3448   }
   3449 }
   3450 
   3451 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
   3452   // Check if the tag uses struct-path aware TBAA format.
   3453   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
   3454     return &MD;
   3455 
   3456   auto &Context = MD.getContext();
   3457   if (MD.getNumOperands() == 3) {
   3458     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
   3459     MDNode *ScalarType = MDNode::get(Context, Elts);
   3460     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
   3461     Metadata *Elts2[] = {ScalarType, ScalarType,
   3462                          ConstantAsMetadata::get(
   3463                              Constant::getNullValue(Type::getInt64Ty(Context))),
   3464                          MD.getOperand(2)};
   3465     return MDNode::get(Context, Elts2);
   3466   }
   3467   // Create a MDNode <MD, MD, offset 0>
   3468   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
   3469                                     Type::getInt64Ty(Context)))};
   3470   return MDNode::get(Context, Elts);
   3471 }
   3472 
   3473 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
   3474                                       Instruction *&Temp) {
   3475   if (Opc != Instruction::BitCast)
   3476     return nullptr;
   3477 
   3478   Temp = nullptr;
   3479   Type *SrcTy = V->getType();
   3480   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
   3481       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
   3482     LLVMContext &Context = V->getContext();
   3483 
   3484     // We have no information about target data layout, so we assume that
   3485     // the maximum pointer size is 64bit.
   3486     Type *MidTy = Type::getInt64Ty(Context);
   3487     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
   3488 
   3489     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
   3490   }
   3491 
   3492   return nullptr;
   3493 }
   3494 
   3495 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
   3496   if (Opc != Instruction::BitCast)
   3497     return nullptr;
   3498 
   3499   Type *SrcTy = C->getType();
   3500   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
   3501       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
   3502     LLVMContext &Context = C->getContext();
   3503 
   3504     // We have no information about target data layout, so we assume that
   3505     // the maximum pointer size is 64bit.
   3506     Type *MidTy = Type::getInt64Ty(Context);
   3507 
   3508     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
   3509                                      DestTy);
   3510   }
   3511 
   3512   return nullptr;
   3513 }
   3514 
   3515 /// Check the debug info version number, if it is out-dated, drop the debug
   3516 /// info. Return true if module is modified.
   3517 bool llvm::UpgradeDebugInfo(Module &M) {
   3518   unsigned Version = getDebugMetadataVersionFromModule(M);
   3519   if (Version == DEBUG_METADATA_VERSION) {
   3520     bool BrokenDebugInfo = false;
   3521     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
   3522       report_fatal_error("Broken module found, compilation aborted!");
   3523     if (!BrokenDebugInfo)
   3524       // Everything is ok.
   3525       return false;
   3526     else {
   3527       // Diagnose malformed debug info.
   3528       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
   3529       M.getContext().diagnose(Diag);
   3530     }
   3531   }
   3532   bool Modified = StripDebugInfo(M);
   3533   if (Modified && Version != DEBUG_METADATA_VERSION) {
   3534     // Diagnose a version mismatch.
   3535     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
   3536     M.getContext().diagnose(DiagVersion);
   3537   }
   3538   return Modified;
   3539 }
   3540 
   3541 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
   3542   bool Changed = false;
   3543   NamedMDNode *ModRetainReleaseMarker =
   3544       M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
   3545   if (ModRetainReleaseMarker) {
   3546     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
   3547     if (Op) {
   3548       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
   3549       if (ID) {
   3550         SmallVector<StringRef, 4> ValueComp;
   3551         ID->getString().split(ValueComp, "#");
   3552         if (ValueComp.size() == 2) {
   3553           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
   3554           Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
   3555           ModRetainReleaseMarker->setOperand(0,
   3556                                              MDNode::get(M.getContext(), Ops));
   3557           Changed = true;
   3558         }
   3559       }
   3560     }
   3561   }
   3562   return Changed;
   3563 }
   3564 
   3565 bool llvm::UpgradeModuleFlags(Module &M) {
   3566   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
   3567   if (!ModFlags)
   3568     return false;
   3569 
   3570   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
   3571   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
   3572     MDNode *Op = ModFlags->getOperand(I);
   3573     if (Op->getNumOperands() != 3)
   3574       continue;
   3575     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
   3576     if (!ID)
   3577       continue;
   3578     if (ID->getString() == "Objective-C Image Info Version")
   3579       HasObjCFlag = true;
   3580     if (ID->getString() == "Objective-C Class Properties")
   3581       HasClassProperties = true;
   3582     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
   3583     // field was Error and now they are Max.
   3584     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
   3585       if (auto *Behavior =
   3586               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
   3587         if (Behavior->getLimitedValue() == Module::Error) {
   3588           Type *Int32Ty = Type::getInt32Ty(M.getContext());
   3589           Metadata *Ops[3] = {
   3590               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
   3591               MDString::get(M.getContext(), ID->getString()),
   3592               Op->getOperand(2)};
   3593           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
   3594           Changed = true;
   3595         }
   3596       }
   3597     }
   3598     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
   3599     // section name so that llvm-lto will not complain about mismatching
   3600     // module flags that is functionally the same.
   3601     if (ID->getString() == "Objective-C Image Info Section") {
   3602       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
   3603         SmallVector<StringRef, 4> ValueComp;
   3604         Value->getString().split(ValueComp, " ");
   3605         if (ValueComp.size() != 1) {
   3606           std::string NewValue;
   3607           for (auto &S : ValueComp)
   3608             NewValue += S.str();
   3609           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
   3610                               MDString::get(M.getContext(), NewValue)};
   3611           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
   3612           Changed = true;
   3613         }
   3614       }
   3615     }
   3616   }
   3617 
   3618   // "Objective-C Class Properties" is recently added for Objective-C. We
   3619   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
   3620   // flag of value 0, so we can correclty downgrade this flag when trying to
   3621   // link an ObjC bitcode without this module flag with an ObjC bitcode with
   3622   // this module flag.
   3623   if (HasObjCFlag && !HasClassProperties) {
   3624     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
   3625                     (uint32_t)0);
   3626     Changed = true;
   3627   }
   3628 
   3629   return Changed;
   3630 }
   3631 
   3632 void llvm::UpgradeSectionAttributes(Module &M) {
   3633   auto TrimSpaces = [](StringRef Section) -> std::string {
   3634     SmallVector<StringRef, 5> Components;
   3635     Section.split(Components, ',');
   3636 
   3637     SmallString<32> Buffer;
   3638     raw_svector_ostream OS(Buffer);
   3639 
   3640     for (auto Component : Components)
   3641       OS << ',' << Component.trim();
   3642 
   3643     return OS.str().substr(1);
   3644   };
   3645 
   3646   for (auto &GV : M.globals()) {
   3647     if (!GV.hasSection())
   3648       continue;
   3649 
   3650     StringRef Section = GV.getSection();
   3651 
   3652     if (!Section.startswith("__DATA, __objc_catlist"))
   3653       continue;
   3654 
   3655     // __DATA, __objc_catlist, regular, no_dead_strip
   3656     // __DATA,__objc_catlist,regular,no_dead_strip
   3657     GV.setSection(TrimSpaces(Section));
   3658   }
   3659 }
   3660 
   3661 static bool isOldLoopArgument(Metadata *MD) {
   3662   auto *T = dyn_cast_or_null<MDTuple>(MD);
   3663   if (!T)
   3664     return false;
   3665   if (T->getNumOperands() < 1)
   3666     return false;
   3667   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
   3668   if (!S)
   3669     return false;
   3670   return S->getString().startswith("llvm.vectorizer.");
   3671 }
   3672 
   3673 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
   3674   StringRef OldPrefix = "llvm.vectorizer.";
   3675   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
   3676 
   3677   if (OldTag == "llvm.vectorizer.unroll")
   3678     return MDString::get(C, "llvm.loop.interleave.count");
   3679 
   3680   return MDString::get(
   3681       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
   3682              .str());
   3683 }
   3684 
   3685 static Metadata *upgradeLoopArgument(Metadata *MD) {
   3686   auto *T = dyn_cast_or_null<MDTuple>(MD);
   3687   if (!T)
   3688     return MD;
   3689   if (T->getNumOperands() < 1)
   3690     return MD;
   3691   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
   3692   if (!OldTag)
   3693     return MD;
   3694   if (!OldTag->getString().startswith("llvm.vectorizer."))
   3695     return MD;
   3696 
   3697   // This has an old tag.  Upgrade it.
   3698   SmallVector<Metadata *, 8> Ops;
   3699   Ops.reserve(T->getNumOperands());
   3700   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
   3701   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
   3702     Ops.push_back(T->getOperand(I));
   3703 
   3704   return MDTuple::get(T->getContext(), Ops);
   3705 }
   3706 
   3707 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
   3708   auto *T = dyn_cast<MDTuple>(&N);
   3709   if (!T)
   3710     return &N;
   3711 
   3712   if (none_of(T->operands(), isOldLoopArgument))
   3713     return &N;
   3714 
   3715   SmallVector<Metadata *, 8> Ops;
   3716   Ops.reserve(T->getNumOperands());
   3717   for (Metadata *MD : T->operands())
   3718     Ops.push_back(upgradeLoopArgument(MD));
   3719 
   3720   return MDTuple::get(T->getContext(), Ops);
   3721 }
   3722