Home | History | Annotate | Download | only in Hexagon
      1 //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 // Table of contents:
     11 //     (0) Definitions
     12 //     (1) Immediates
     13 //     (2) Type casts
     14 //     (3) Extend/truncate
     15 //     (4) Logical
     16 //     (5) Compare
     17 //     (6) Select
     18 //     (7) Insert/extract
     19 //     (8) Shift/permute
     20 //     (9) Arithmetic/bitwise
     21 //    (10) Bit
     22 //    (11) PIC
     23 //    (12) Load
     24 //    (13) Store
     25 //    (14) Memop
     26 //    (15) Call
     27 //    (16) Branch
     28 //    (17) Misc
     29 
     30 // Guidelines (in no particular order):
     31 // 1. Avoid relying on pattern ordering to give preference to one pattern
     32 //    over another, prefer using AddedComplexity instead. The reason for
     33 //    this is to avoid unintended conseqeuences (caused by altering the
     34 //    order) when making changes. The current order of patterns in this
     35 //    file obviously does play some role, but none of the ordering was
     36 //    deliberately chosen (other than to create a logical structure of
     37 //    this file). When making changes, adding AddedComplexity to existing
     38 //    patterns may be needed.
     39 // 2. Maintain the logical structure of the file, try to put new patterns
     40 //    in designated sections.
     41 // 3. Do not use A2_combinew instruction directly, use Combinew fragment
     42 //    instead. It uses REG_SEQUENCE, which is more amenable to optimizations.
     43 // 4. Most selection macros are based on PatFrags. For DAGs that involve
     44 //    SDNodes, use pf1/pf2 to convert them to PatFrags. Use common frags
     45 //    whenever possible (see the Definitions section). When adding new
     46 //    macro, try to make is general to enable reuse across sections.
     47 // 5. Compound instructions (e.g. Rx+Rs*Rt) are generated under the condition
     48 //    that the nested operation has only one use. Having it separated in case
     49 //    of multiple uses avoids duplication of (processor) work.
     50 // 6. The v4 vector instructions (64-bit) are treated as core instructions,
     51 //    for example, A2_vaddh is in the "arithmetic" section with A2_add.
     52 // 7. When adding a pattern for an instruction with a constant-extendable
     53 //    operand, allow all possible kinds of inputs for the immediate value
     54 //    (see AnyImm/anyimm and their variants in the Definitions section).
     55 
     56 
     57 // --(0) Definitions -----------------------------------------------------
     58 //
     59 
     60 // This complex pattern exists only to create a machine instruction operand
     61 // of type "frame index". There doesn't seem to be a way to do that directly
     62 // in the patterns.
     63 def AddrFI: ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
     64 
     65 // These complex patterns are not strictly necessary, since global address
     66 // folding will happen during DAG combining. For distinguishing between GA
     67 // and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
     68 def AddrGA: ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
     69 def AddrGP: ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
     70 def AnyImm: ComplexPattern<i32, 1, "SelectAnyImm", [], []>;
     71 def AnyInt: ComplexPattern<i32, 1, "SelectAnyInt", [], []>;
     72 
     73 // Global address or a constant being a multiple of 2^n.
     74 def AnyImm0: ComplexPattern<i32, 1, "SelectAnyImm0", [], []>;
     75 def AnyImm1: ComplexPattern<i32, 1, "SelectAnyImm1", [], []>;
     76 def AnyImm2: ComplexPattern<i32, 1, "SelectAnyImm2", [], []>;
     77 def AnyImm3: ComplexPattern<i32, 1, "SelectAnyImm3", [], []>;
     78 
     79 
     80 // Type helper frags.
     81 def V2I1:   PatLeaf<(v2i1    PredRegs:$R)>;
     82 def V4I1:   PatLeaf<(v4i1    PredRegs:$R)>;
     83 def V8I1:   PatLeaf<(v8i1    PredRegs:$R)>;
     84 def V4I8:   PatLeaf<(v4i8    IntRegs:$R)>;
     85 def V2I16:  PatLeaf<(v2i16   IntRegs:$R)>;
     86 
     87 def V8I8:   PatLeaf<(v8i8    DoubleRegs:$R)>;
     88 def V4I16:  PatLeaf<(v4i16   DoubleRegs:$R)>;
     89 def V2I32:  PatLeaf<(v2i32   DoubleRegs:$R)>;
     90 
     91 def HQ8:    PatLeaf<(VecQ8   HvxQR:$R)>;
     92 def HQ16:   PatLeaf<(VecQ16  HvxQR:$R)>;
     93 def HQ32:   PatLeaf<(VecQ32  HvxQR:$R)>;
     94 
     95 def HVI8:   PatLeaf<(VecI8   HvxVR:$R)>;
     96 def HVI16:  PatLeaf<(VecI16  HvxVR:$R)>;
     97 def HVI32:  PatLeaf<(VecI32  HvxVR:$R)>;
     98 
     99 def HWI8:   PatLeaf<(VecPI8  HvxWR:$R)>;
    100 def HWI16:  PatLeaf<(VecPI16 HvxWR:$R)>;
    101 def HWI32:  PatLeaf<(VecPI32 HvxWR:$R)>;
    102 
    103 def SDTVecVecIntOp:
    104   SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
    105                        SDTCisVT<3,i32>]>;
    106 
    107 def HexagonVALIGN:     SDNode<"HexagonISD::VALIGN",     SDTVecVecIntOp>;
    108 def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
    109 
    110 def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
    111                     (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
    112 def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
    113 
    114 // Pattern fragments to extract the low and high subregisters from a
    115 // 64-bit value.
    116 def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
    117 def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
    118 
    119 def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
    120   return isOrEquivalentToAdd(N);
    121 }]>;
    122 
    123 def IsPow2_32: PatLeaf<(i32 imm), [{
    124   uint32_t V = N->getZExtValue();
    125   return isPowerOf2_32(V);
    126 }]>;
    127 
    128 def IsPow2_64: PatLeaf<(i64 imm), [{
    129   uint64_t V = N->getZExtValue();
    130   return isPowerOf2_64(V);
    131 }]>;
    132 
    133 def IsNPow2_32: PatLeaf<(i32 imm), [{
    134   uint32_t NV = ~N->getZExtValue();
    135   return isPowerOf2_32(NV);
    136 }]>;
    137 
    138 def IsPow2_64L: PatLeaf<(i64 imm), [{
    139   uint64_t V = N->getZExtValue();
    140   return isPowerOf2_64(V) && Log2_64(V) < 32;
    141 }]>;
    142 
    143 def IsPow2_64H: PatLeaf<(i64 imm), [{
    144   uint64_t V = N->getZExtValue();
    145   return isPowerOf2_64(V) && Log2_64(V) >= 32;
    146 }]>;
    147 
    148 def IsNPow2_64L: PatLeaf<(i64 imm), [{
    149   uint64_t NV = ~N->getZExtValue();
    150   return isPowerOf2_64(NV) && Log2_64(NV) < 32;
    151 }]>;
    152 
    153 def IsNPow2_64H: PatLeaf<(i64 imm), [{
    154   uint64_t NV = ~N->getZExtValue();
    155   return isPowerOf2_64(NV) && Log2_64(NV) >= 32;
    156 }]>;
    157 
    158 class IsUGT<int Width, int Arg>: PatLeaf<(i32 imm),
    159   "uint64_t V = N->getZExtValue();" #
    160   "return isUInt<" # Width # ">(V) && V > " # Arg # ";"
    161 >;
    162 
    163 def SDEC1: SDNodeXForm<imm, [{
    164   int32_t V = N->getSExtValue();
    165   return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
    166 }]>;
    167 
    168 def UDEC1: SDNodeXForm<imm, [{
    169   uint32_t V = N->getZExtValue();
    170   assert(V >= 1);
    171   return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
    172 }]>;
    173 
    174 def UDEC32: SDNodeXForm<imm, [{
    175   uint32_t V = N->getZExtValue();
    176   assert(V >= 32);
    177   return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
    178 }]>;
    179 
    180 def Log2_32: SDNodeXForm<imm, [{
    181   uint32_t V = N->getZExtValue();
    182   return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
    183 }]>;
    184 
    185 def Log2_64: SDNodeXForm<imm, [{
    186   uint64_t V = N->getZExtValue();
    187   return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32);
    188 }]>;
    189 
    190 def LogN2_32: SDNodeXForm<imm, [{
    191   uint32_t NV = ~N->getZExtValue();
    192   return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
    193 }]>;
    194 
    195 def LogN2_64: SDNodeXForm<imm, [{
    196   uint64_t NV = ~N->getZExtValue();
    197   return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32);
    198 }]>;
    199 
    200 def NegImm8: SDNodeXForm<imm, [{
    201   int8_t NV = -N->getSExtValue();
    202   return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
    203 }]>;
    204 
    205 def NegImm16: SDNodeXForm<imm, [{
    206   int16_t NV = -N->getSExtValue();
    207   return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
    208 }]>;
    209 
    210 def NegImm32: SDNodeXForm<imm, [{
    211   int32_t NV = -N->getSExtValue();
    212   return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
    213 }]>;
    214 
    215 
    216 // Helpers for type promotions/contractions.
    217 def I1toI32:  OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
    218 def I32toI1:  OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
    219 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
    220 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
    221 
    222 def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt),
    223   (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>;
    224 
    225 def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
    226 def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
    227 def anyimm: PatLeaf<(i32 AnyImm:$Imm)>;
    228 def anyint: PatLeaf<(i32 AnyInt:$Imm)>;
    229 
    230 // Global address or an aligned constant.
    231 def anyimm0: PatLeaf<(i32 AnyImm0:$Addr)>;
    232 def anyimm1: PatLeaf<(i32 AnyImm1:$Addr)>;
    233 def anyimm2: PatLeaf<(i32 AnyImm2:$Addr)>;
    234 def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>;
    235 
    236 def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
    237 def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
    238 
    239 // This complex pattern is really only to detect various forms of
    240 // sign-extension i32->i64. The selected value will be of type i64
    241 // whose low word is the value being extended. The high word is
    242 // unspecified.
    243 def Usxtw:  ComplexPattern<i64, 1, "DetectUseSxtw", [], []>;
    244 
    245 def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
    246 def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
    247 def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
    248 
    249 def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
    250          (PS_fi (i32 AddrFI:$Rs), imm:$off)>;
    251 
    252 
    253 // Converters from unary/binary SDNode to PatFrag.
    254 class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>;
    255 class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
    256 
    257 class Not2<PatFrag P>
    258   : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
    259 
    260 class Su<PatFrag Op>
    261   : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
    262             Op.OperandTransform>;
    263 
    264 // Main selection macros.
    265 
    266 class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred>
    267   : Pat<(ResVT (Op RegPred:$Rs)), (MI RegPred:$Rs)>;
    268 
    269 class OpR_RI_pat<InstHexagon MI, PatFrag Op, ValueType ResType,
    270                  PatFrag RegPred, PatFrag ImmPred>
    271   : Pat<(ResType (Op RegPred:$Rs, ImmPred:$I)),
    272         (MI RegPred:$Rs, imm:$I)>;
    273 
    274 class OpR_RR_pat<InstHexagon MI, PatFrag Op, ValueType ResType,
    275                  PatFrag RsPred, PatFrag RtPred = RsPred>
    276   : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
    277         (MI RsPred:$Rs, RtPred:$Rt)>;
    278 
    279 class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
    280                  PatFrag RegPred, PatFrag ImmPred>
    281   : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)),
    282         (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>;
    283 
    284 class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
    285                  PatFrag RxPred, PatFrag RsPred, PatFrag RtPred>
    286   : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
    287         (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;
    288 
    289 multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
    290                           InstHexagon InstA, InstHexagon InstB> {
    291   def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B),
    292            (InstA Val:$A, Val:$B)>;
    293   def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$B, Val:$A),
    294            (InstB Val:$A, Val:$B)>;
    295 }
    296 
    297 
    298 // Frags for commonly used SDNodes.
    299 def Add: pf2<add>;    def And: pf2<and>;    def Sra: pf2<sra>;
    300 def Sub: pf2<sub>;    def Or:  pf2<or>;     def Srl: pf2<srl>;
    301 def Mul: pf2<mul>;    def Xor: pf2<xor>;    def Shl: pf2<shl>;
    302 
    303 def Rol: pf2<rotl>;
    304 
    305 // --(1) Immediate -------------------------------------------------------
    306 //
    307 
    308 def SDTHexagonCONST32
    309   : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>;
    310 
    311 def HexagonJT:          SDNode<"HexagonISD::JT",          SDTIntUnaryOp>;
    312 def HexagonCP:          SDNode<"HexagonISD::CP",          SDTIntUnaryOp>;
    313 def HexagonCONST32:     SDNode<"HexagonISD::CONST32",     SDTHexagonCONST32>;
    314 def HexagonCONST32_GP:  SDNode<"HexagonISD::CONST32_GP",  SDTHexagonCONST32>;
    315 
    316 def TruncI64ToI32: SDNodeXForm<imm, [{
    317   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
    318 }]>;
    319 
    320 def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
    321 def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>;
    322 
    323 def: Pat<(HexagonCONST32    tglobaltlsaddr:$A), (A2_tfrsi imm:$A)>;
    324 def: Pat<(HexagonCONST32    bbl:$A),            (A2_tfrsi imm:$A)>;
    325 def: Pat<(HexagonCONST32    tglobaladdr:$A),    (A2_tfrsi imm:$A)>;
    326 def: Pat<(HexagonCONST32_GP tblockaddress:$A),  (A2_tfrsi imm:$A)>;
    327 def: Pat<(HexagonCONST32_GP tglobaladdr:$A),    (A2_tfrsi imm:$A)>;
    328 def: Pat<(HexagonJT         tjumptable:$A),     (A2_tfrsi imm:$A)>;
    329 def: Pat<(HexagonCP         tconstpool:$A),     (A2_tfrsi imm:$A)>;
    330 // The HVX load patterns also match CP directly. Make sure that if
    331 // the selection of this opcode changes, it's updated in all places.
    332 
    333 def: Pat<(i1 0),        (PS_false)>;
    334 def: Pat<(i1 1),        (PS_true)>;
    335 def: Pat<(i64 imm:$v),  (CONST64 imm:$v)>;
    336 
    337 def ftoi : SDNodeXForm<fpimm, [{
    338   APInt I = N->getValueAPF().bitcastToAPInt();
    339   return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
    340                                    MVT::getIntegerVT(I.getBitWidth()));
    341 }]>;
    342 
    343 def: Pat<(f32ImmPred:$f), (A2_tfrsi (ftoi $f))>;
    344 def: Pat<(f64ImmPred:$f), (CONST64  (ftoi $f))>;
    345 
    346 def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
    347 
    348 // --(2) Type cast -------------------------------------------------------
    349 //
    350 
    351 let Predicates = [HasV5] in {
    352   def: OpR_R_pat<F2_conv_sf2df,      pf1<fpextend>,   f64, F32>;
    353   def: OpR_R_pat<F2_conv_df2sf,      pf1<fpround>,    f32, F64>;
    354 
    355   def: OpR_R_pat<F2_conv_w2sf,       pf1<sint_to_fp>, f32, I32>;
    356   def: OpR_R_pat<F2_conv_d2sf,       pf1<sint_to_fp>, f32, I64>;
    357   def: OpR_R_pat<F2_conv_w2df,       pf1<sint_to_fp>, f64, I32>;
    358   def: OpR_R_pat<F2_conv_d2df,       pf1<sint_to_fp>, f64, I64>;
    359 
    360   def: OpR_R_pat<F2_conv_uw2sf,      pf1<uint_to_fp>, f32, I32>;
    361   def: OpR_R_pat<F2_conv_ud2sf,      pf1<uint_to_fp>, f32, I64>;
    362   def: OpR_R_pat<F2_conv_uw2df,      pf1<uint_to_fp>, f64, I32>;
    363   def: OpR_R_pat<F2_conv_ud2df,      pf1<uint_to_fp>, f64, I64>;
    364 
    365   def: OpR_R_pat<F2_conv_sf2w_chop,  pf1<fp_to_sint>, i32, F32>;
    366   def: OpR_R_pat<F2_conv_df2w_chop,  pf1<fp_to_sint>, i32, F64>;
    367   def: OpR_R_pat<F2_conv_sf2d_chop,  pf1<fp_to_sint>, i64, F32>;
    368   def: OpR_R_pat<F2_conv_df2d_chop,  pf1<fp_to_sint>, i64, F64>;
    369 
    370   def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
    371   def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
    372   def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
    373   def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
    374 }
    375 
    376 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
    377 let Predicates = [HasV5] in {
    378   def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
    379   def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
    380   def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
    381   def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
    382 }
    383 
    384 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
    385   def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
    386   def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>;
    387 }
    388 
    389 // Bit convert vector types to integers.
    390 defm: Cast_pat<v4i8,  i32, IntRegs>;
    391 defm: Cast_pat<v2i16, i32, IntRegs>;
    392 defm: Cast_pat<v8i8,  i64, DoubleRegs>;
    393 defm: Cast_pat<v4i16, i64, DoubleRegs>;
    394 defm: Cast_pat<v2i32, i64, DoubleRegs>;
    395 
    396 
    397 // --(3) Extend/truncate -------------------------------------------------
    398 //
    399 
    400 def: Pat<(sext_inreg I32:$Rs, i8),  (A2_sxtb I32:$Rs)>;
    401 def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>;
    402 def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>;
    403 def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>;
    404 def: Pat<(sext_inreg I64:$Rs, i8),  (A2_sxtw (A2_sxtb (LoReg $Rs)))>;
    405 
    406 def: Pat<(i64 (sext I1:$Pu)),
    407          (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
    408                    (C2_muxii PredRegs:$Pu, -1, 0))>;
    409 
    410 def: Pat<(i32   (sext I1:$Pu)),   (C2_muxii I1:$Pu, -1, 0)>;
    411 def: Pat<(i32   (zext I1:$Pu)),   (C2_muxii I1:$Pu, 1, 0)>;
    412 def: Pat<(i64   (zext I1:$Pu)),   (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
    413 def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
    414 def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
    415 def: Pat<(v4i8  (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
    416 def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
    417 def: Pat<(v8i8  (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
    418 
    419 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
    420 def: Pat<(Zext64 I32:$Rs),     (ToZext64 $Rs)>;
    421 def: Pat<(Aext64 I32:$Rs),     (ToZext64 $Rs)>;
    422 
    423 def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>;
    424 def: Pat<(i1 (trunc I64:$Rs)),  (C2_tfrrp (LoReg $Rs))>;
    425 
    426 let AddedComplexity = 20 in {
    427   def: Pat<(and I32:$Rs, 255),   (A2_zxtb I32:$Rs)>;
    428   def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>;
    429 }
    430 
    431 def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
    432 def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
    433 
    434 def Vsplatpi: OutPatFrag<(ops node:$V),
    435                          (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
    436 def: Pat<(v8i8 (zext V8I1:$Pu)),
    437          (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
    438 def: Pat<(v4i16 (zext V4I1:$Pu)),
    439          (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
    440 def: Pat<(v2i32 (zext V2I1:$Pu)),
    441          (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
    442 
    443 def: Pat<(v4i8 (zext V4I1:$Pu)),
    444          (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
    445 def: Pat<(v2i16 (zext V2I1:$Pu)),
    446          (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
    447 
    448 def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
    449 def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
    450 def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
    451 def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
    452 def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
    453 def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
    454 
    455 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
    456          (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
    457 
    458 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
    459          (Combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
    460 
    461 // Truncate: from vector B copy all 'E'ven 'B'yte elements:
    462 // A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
    463 def: Pat<(v4i8 (trunc V4I16:$Rs)),
    464          (S2_vtrunehb V4I16:$Rs)>;
    465 
    466 // Truncate: from vector B copy all 'O'dd 'B'yte elements:
    467 // A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
    468 // S2_vtrunohb
    469 
    470 // Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
    471 // A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
    472 // S2_vtruneh
    473 
    474 def: Pat<(v2i16 (trunc V2I32:$Rs)),
    475          (A2_combine_ll (HiReg $Rs), (LoReg $Rs))>;
    476 
    477 
    478 // --(4) Logical ---------------------------------------------------------
    479 //
    480 
    481 def: Pat<(not I1:$Ps),      (C2_not I1:$Ps)>;
    482 def: Pat<(not V8I1:$Ps),    (C2_not V8I1:$Ps)>;
    483 def: Pat<(add I1:$Ps, -1),  (C2_not I1:$Ps)>;
    484 
    485 multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> {
    486   def: OpR_RR_pat<MI, Op,   i1,   I1>;
    487   def: OpR_RR_pat<MI, Op, v2i1, V2I1>;
    488   def: OpR_RR_pat<MI, Op, v4i1, V4I1>;
    489   def: OpR_RR_pat<MI, Op, v8i1, V8I1>;
    490 }
    491 
    492 multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> {
    493   def: AccRRR_pat<MI, AccOp, Op,   I1,   I1,   I1>;
    494   def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>;
    495   def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>;
    496   def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>;
    497 }
    498 
    499 defm: BoolOpR_RR_pat<C2_and,   And>;
    500 defm: BoolOpR_RR_pat<C2_or,    Or>;
    501 defm: BoolOpR_RR_pat<C2_xor,   Xor>;
    502 defm: BoolOpR_RR_pat<C2_andn,  Not2<And>>;
    503 defm: BoolOpR_RR_pat<C2_orn,   Not2<Or>>;
    504 
    505 // op(Ps, op(Pt, Pu))
    506 defm: BoolAccRRR_pat<C4_and_and,   And, Su<And>>;
    507 defm: BoolAccRRR_pat<C4_and_or,    And, Su<Or>>;
    508 defm: BoolAccRRR_pat<C4_or_and,    Or,  Su<And>>;
    509 defm: BoolAccRRR_pat<C4_or_or,     Or,  Su<Or>>;
    510 
    511 // op(Ps, op(Pt, ~Pu))
    512 defm: BoolAccRRR_pat<C4_and_andn,  And, Su<Not2<And>>>;
    513 defm: BoolAccRRR_pat<C4_and_orn,   And, Su<Not2<Or>>>;
    514 defm: BoolAccRRR_pat<C4_or_andn,   Or,  Su<Not2<And>>>;
    515 defm: BoolAccRRR_pat<C4_or_orn,    Or,  Su<Not2<Or>>>;
    516 
    517 
    518 // --(5) Compare ---------------------------------------------------------
    519 //
    520 
    521 // Avoid negated comparisons, i.e. those of form "Pd = !cmp(...)".
    522 // These cannot form compounds (e.g. J4_cmpeqi_tp0_jump_nt).
    523 
    524 def: OpR_RI_pat<C2_cmpeqi,    seteq,          i1, I32,  anyimm>;
    525 def: OpR_RI_pat<C2_cmpgti,    setgt,          i1, I32,  anyimm>;
    526 def: OpR_RI_pat<C2_cmpgtui,   setugt,         i1, I32,  anyimm>;
    527 
    528 def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)),
    529          (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10))>;
    530 def: Pat<(i1 (setuge I32:$Rs, u32_0ImmPred:$u9)),
    531          (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9))>;
    532 
    533 def: Pat<(i1 (setlt I32:$Rs, s32_0ImmPred:$s10)),
    534          (C2_not (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10)))>;
    535 def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)),
    536          (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>;
    537 
    538 // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
    539 // that reverse the order of the operands.
    540 class RevCmp<PatFrag F>
    541   : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode,
    542             F.OperandTransform>;
    543 
    544 def: OpR_RR_pat<C2_cmpeq,     seteq,          i1,   I32>;
    545 def: OpR_RR_pat<C2_cmpgt,     setgt,          i1,   I32>;
    546 def: OpR_RR_pat<C2_cmpgtu,    setugt,         i1,   I32>;
    547 def: OpR_RR_pat<C2_cmpgt,     RevCmp<setlt>,  i1,   I32>;
    548 def: OpR_RR_pat<C2_cmpgtu,    RevCmp<setult>, i1,   I32>;
    549 def: OpR_RR_pat<C2_cmpeqp,    seteq,          i1,   I64>;
    550 def: OpR_RR_pat<C2_cmpgtp,    setgt,          i1,   I64>;
    551 def: OpR_RR_pat<C2_cmpgtup,   setugt,         i1,   I64>;
    552 def: OpR_RR_pat<C2_cmpgtp,    RevCmp<setlt>,  i1,   I64>;
    553 def: OpR_RR_pat<C2_cmpgtup,   RevCmp<setult>, i1,   I64>;
    554 def: OpR_RR_pat<A2_vcmpbeq,   seteq,          i1,   V8I8>;
    555 def: OpR_RR_pat<A2_vcmpbeq,   seteq,          v8i1, V8I8>;
    556 def: OpR_RR_pat<A4_vcmpbgt,   RevCmp<setlt>,  i1,   V8I8>;
    557 def: OpR_RR_pat<A4_vcmpbgt,   RevCmp<setlt>,  v8i1, V8I8>;
    558 def: OpR_RR_pat<A4_vcmpbgt,   setgt,          i1,   V8I8>;
    559 def: OpR_RR_pat<A4_vcmpbgt,   setgt,          v8i1, V8I8>;
    560 def: OpR_RR_pat<A2_vcmpbgtu,  RevCmp<setult>, i1,   V8I8>;
    561 def: OpR_RR_pat<A2_vcmpbgtu,  RevCmp<setult>, v8i1, V8I8>;
    562 def: OpR_RR_pat<A2_vcmpbgtu,  setugt,         i1,   V8I8>;
    563 def: OpR_RR_pat<A2_vcmpbgtu,  setugt,         v8i1, V8I8>;
    564 def: OpR_RR_pat<A2_vcmpheq,   seteq,          i1,   V4I16>;
    565 def: OpR_RR_pat<A2_vcmpheq,   seteq,          v4i1, V4I16>;
    566 def: OpR_RR_pat<A2_vcmphgt,   RevCmp<setlt>,  i1,   V4I16>;
    567 def: OpR_RR_pat<A2_vcmphgt,   RevCmp<setlt>,  v4i1, V4I16>;
    568 def: OpR_RR_pat<A2_vcmphgt,   setgt,          i1,   V4I16>;
    569 def: OpR_RR_pat<A2_vcmphgt,   setgt,          v4i1, V4I16>;
    570 def: OpR_RR_pat<A2_vcmphgtu,  RevCmp<setult>, i1,   V4I16>;
    571 def: OpR_RR_pat<A2_vcmphgtu,  RevCmp<setult>, v4i1, V4I16>;
    572 def: OpR_RR_pat<A2_vcmphgtu,  setugt,         i1,   V4I16>;
    573 def: OpR_RR_pat<A2_vcmphgtu,  setugt,         v4i1, V4I16>;
    574 def: OpR_RR_pat<A2_vcmpweq,   seteq,          i1,   V2I32>;
    575 def: OpR_RR_pat<A2_vcmpweq,   seteq,          v2i1, V2I32>;
    576 def: OpR_RR_pat<A2_vcmpwgt,   RevCmp<setlt>,  i1,   V2I32>;
    577 def: OpR_RR_pat<A2_vcmpwgt,   RevCmp<setlt>,  v2i1, V2I32>;
    578 def: OpR_RR_pat<A2_vcmpwgt,   setgt,          i1,   V2I32>;
    579 def: OpR_RR_pat<A2_vcmpwgt,   setgt,          v2i1, V2I32>;
    580 def: OpR_RR_pat<A2_vcmpwgtu,  RevCmp<setult>, i1,   V2I32>;
    581 def: OpR_RR_pat<A2_vcmpwgtu,  RevCmp<setult>, v2i1, V2I32>;
    582 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         i1,   V2I32>;
    583 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         v2i1, V2I32>;
    584 
    585 let Predicates = [HasV5] in {
    586   def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
    587   def: OpR_RR_pat<F2_sfcmpgt,   setgt,          i1, F32>;
    588   def: OpR_RR_pat<F2_sfcmpge,   setge,          i1, F32>;
    589   def: OpR_RR_pat<F2_sfcmpeq,   setoeq,         i1, F32>;
    590   def: OpR_RR_pat<F2_sfcmpgt,   setogt,         i1, F32>;
    591   def: OpR_RR_pat<F2_sfcmpge,   setoge,         i1, F32>;
    592   def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setolt>, i1, F32>;
    593   def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setole>, i1, F32>;
    594   def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setlt>,  i1, F32>;
    595   def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setle>,  i1, F32>;
    596   def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;
    597 
    598   def: OpR_RR_pat<F2_dfcmpeq,   seteq,          i1, F64>;
    599   def: OpR_RR_pat<F2_dfcmpgt,   setgt,          i1, F64>;
    600   def: OpR_RR_pat<F2_dfcmpge,   setge,          i1, F64>;
    601   def: OpR_RR_pat<F2_dfcmpeq,   setoeq,         i1, F64>;
    602   def: OpR_RR_pat<F2_dfcmpgt,   setogt,         i1, F64>;
    603   def: OpR_RR_pat<F2_dfcmpge,   setoge,         i1, F64>;
    604   def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setolt>, i1, F64>;
    605   def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setole>, i1, F64>;
    606   def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setlt>,  i1, F64>;
    607   def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setle>,  i1, F64>;
    608   def: OpR_RR_pat<F2_dfcmpuo,   setuo,          i1, F64>;
    609 }
    610 
    611 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.
    612 
    613 def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)),
    614          (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>;
    615 def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)),
    616          (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>;
    617 def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)),
    618          (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>;
    619 
    620 class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
    621                   PatFrag RsPred, PatFrag RtPred = RsPred>
    622   : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
    623         (Output RsPred:$Rs, RtPred:$Rt)>;
    624 
    625 class Outn<InstHexagon MI>
    626   : OutPatFrag<(ops node:$Rs, node:$Rt),
    627                (C2_not (MI $Rs, $Rt))>;
    628 
    629 def: OpmR_RR_pat<Outn<C2_cmpeq>,    setne,          i1,   I32>;
    630 def: OpmR_RR_pat<Outn<C2_cmpgt>,    setle,          i1,   I32>;
    631 def: OpmR_RR_pat<Outn<C2_cmpgtu>,   setule,         i1,   I32>;
    632 def: OpmR_RR_pat<Outn<C2_cmpgt>,    RevCmp<setge>,  i1,   I32>;
    633 def: OpmR_RR_pat<Outn<C2_cmpgtu>,   RevCmp<setuge>, i1,   I32>;
    634 def: OpmR_RR_pat<Outn<C2_cmpeqp>,   setne,          i1,   I64>;
    635 def: OpmR_RR_pat<Outn<C2_cmpgtp>,   setle,          i1,   I64>;
    636 def: OpmR_RR_pat<Outn<C2_cmpgtup>,  setule,         i1,   I64>;
    637 def: OpmR_RR_pat<Outn<C2_cmpgtp>,   RevCmp<setge>,  i1,   I64>;
    638 def: OpmR_RR_pat<Outn<C2_cmpgtup>,  RevCmp<setuge>, i1,   I64>;
    639 def: OpmR_RR_pat<Outn<A2_vcmpbeq>,  setne,          v8i1, V8I8>;
    640 def: OpmR_RR_pat<Outn<A4_vcmpbgt>,  setle,          v8i1, V8I8>;
    641 def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule,         v8i1, V8I8>;
    642 def: OpmR_RR_pat<Outn<A4_vcmpbgt>,  RevCmp<setge>,  v8i1, V8I8>;
    643 def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>;
    644 def: OpmR_RR_pat<Outn<A2_vcmpheq>,  setne,          v4i1, V4I16>;
    645 def: OpmR_RR_pat<Outn<A2_vcmphgt>,  setle,          v4i1, V4I16>;
    646 def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule,         v4i1, V4I16>;
    647 def: OpmR_RR_pat<Outn<A2_vcmphgt>,  RevCmp<setge>,  v4i1, V4I16>;
    648 def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>;
    649 def: OpmR_RR_pat<Outn<A2_vcmpweq>,  setne,          v2i1, V2I32>;
    650 def: OpmR_RR_pat<Outn<A2_vcmpwgt>,  setle,          v2i1, V2I32>;
    651 def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule,         v2i1, V2I32>;
    652 def: OpmR_RR_pat<Outn<A2_vcmpwgt>,  RevCmp<setge>,  v2i1, V2I32>;
    653 def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>;
    654 
    655 let AddedComplexity = 100 in {
    656   def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)),
    657            (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
    658   def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)),
    659            (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
    660   def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 65535), 0)),
    661            (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
    662   def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 65535), 0)),
    663            (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
    664 }
    665 
    666 // PatFrag for AsserZext which takes the original type as a parameter.
    667 def SDTAssertZext: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0,1>]>;
    668 def AssertZextSD: SDNode<"ISD::AssertZext", SDTAssertZext>;
    669 class AssertZext<ValueType T>: PatFrag<(ops node:$A), (AssertZextSD $A, T)>;
    670 
    671 multiclass Cmpb_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt,
    672                       PatLeaf ImmPred, int Mask> {
    673   def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)),
    674            (MI I32:$Rs, imm:$I)>;
    675   def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)),
    676            (MI I32:$Rs, imm:$I)>;
    677 }
    678 
    679 multiclass CmpbN_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt,
    680                      PatLeaf ImmPred, int Mask> {
    681   def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)),
    682            (C2_not (MI I32:$Rs, imm:$I))>;
    683   def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)),
    684            (C2_not (MI I32:$Rs, imm:$I))>;
    685 }
    686 
    687 multiclass CmpbND_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt,
    688                       PatLeaf ImmPred, int Mask> {
    689   def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)),
    690            (C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>;
    691   def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)),
    692            (C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>;
    693 }
    694 
    695 let AddedComplexity = 200 in {
    696   defm: Cmpb_pat  <A4_cmpbeqi,  seteq,  AssertZext<i8>,  IsUGT<8,31>,  255>;
    697   defm: CmpbN_pat <A4_cmpbeqi,  setne,  AssertZext<i8>,  IsUGT<8,31>,  255>;
    698   defm: Cmpb_pat  <A4_cmpbgtui, setugt, AssertZext<i8>,  IsUGT<32,31>, 255>;
    699   defm: CmpbN_pat <A4_cmpbgtui, setule, AssertZext<i8>,  IsUGT<32,31>, 255>;
    700   defm: Cmpb_pat  <A4_cmphgtui, setugt, AssertZext<i16>, IsUGT<32,31>, 65535>;
    701   defm: CmpbN_pat <A4_cmphgtui, setule, AssertZext<i16>, IsUGT<32,31>, 65535>;
    702   defm: CmpbND_pat<A4_cmpbgtui, setult, AssertZext<i8>,  IsUGT<32,32>, 255>;
    703   defm: CmpbND_pat<A4_cmphgtui, setult, AssertZext<i16>, IsUGT<32,32>, 65535>;
    704 }
    705 
    706 def: Pat<(i32 (zext (i1 (seteq I32:$Rs, I32:$Rt)))),
    707          (A4_rcmpeq I32:$Rs, I32:$Rt)>;
    708 def: Pat<(i32 (zext (i1 (setne I32:$Rs, I32:$Rt)))),
    709          (A4_rcmpneq I32:$Rs, I32:$Rt)>;
    710 def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))),
    711          (A4_rcmpeqi I32:$Rs, imm:$s8)>;
    712 def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))),
    713          (A4_rcmpneqi I32:$Rs, imm:$s8)>;
    714 
    715 def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>;
    716 def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>;
    717 def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)),  (C2_xor I1:$Ps, (C2_not I1:$Pt))>;
    718 def: Pat<(i1 (setne I1:$Ps, I1:$Pt)),  (C2_xor I1:$Ps, I1:$Pt)>;
    719 
    720 // Floating-point comparisons with checks for ordered/unordered status.
    721 
    722 class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3>
    723   : OutPatFrag<(ops node:$Rs, node:$Rt),
    724                (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>;
    725 
    726 class Cmpuf<InstHexagon MI>:  T3<C2_or,  F2_sfcmpuo, MI>;
    727 class Cmpud<InstHexagon MI>:  T3<C2_or,  F2_dfcmpuo, MI>;
    728 
    729 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
    730 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
    731 
    732 let Predicates = [HasV5] in {
    733   def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>,  setueq,         i1, F32>;
    734   def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  setuge,         i1, F32>;
    735   def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  setugt,         i1, F32>;
    736   def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  RevCmp<setule>, i1, F32>;
    737   def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  RevCmp<setult>, i1, F32>;
    738   def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune,         i1, F32>;
    739 
    740   def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>,  setueq,         i1, F64>;
    741   def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  setuge,         i1, F64>;
    742   def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  setugt,         i1, F64>;
    743   def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  RevCmp<setule>, i1, F64>;
    744   def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  RevCmp<setult>, i1, F64>;
    745   def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune,         i1, F64>;
    746 }
    747 
    748 let Predicates = [HasV5] in {
    749   def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
    750   def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;
    751 
    752   def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
    753   def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;
    754 
    755   def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto,   i1, F32>;
    756   def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto,   i1, F64>;
    757 }
    758 
    759 
    760 // --(6) Select ----------------------------------------------------------
    761 //
    762 
    763 def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt),
    764          (C2_mux I1:$Pu, I32:$Rs, I32:$Rt)>;
    765 def: Pat<(select I1:$Pu, anyimm:$s8, I32:$Rs),
    766          (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
    767 def: Pat<(select I1:$Pu, I32:$Rs, anyimm:$s8),
    768          (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
    769 def: Pat<(select I1:$Pu, anyimm:$s8, s8_0ImmPred:$S8),
    770          (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
    771 
    772 def: Pat<(select (not I1:$Pu), I32:$Rs, I32:$Rt),
    773          (C2_mux I1:$Pu, I32:$Rt, I32:$Rs)>;
    774 def: Pat<(select (not I1:$Pu), s8_0ImmPred:$S8, anyimm:$s8),
    775          (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
    776 def: Pat<(select (not I1:$Pu), anyimm:$s8, I32:$Rs),
    777          (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
    778 def: Pat<(select (not I1:$Pu), I32:$Rs, anyimm:$s8),
    779          (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
    780 
    781 // Map from a 64-bit select to an emulated 64-bit mux.
    782 // Hexagon does not support 64-bit MUXes; so emulate with combines.
    783 def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
    784          (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
    785                    (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
    786 
    787 let Predicates = [HasV5] in {
    788   def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
    789            (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
    790   def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
    791            (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
    792   def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
    793            (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
    794   def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
    795            (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
    796                      (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
    797 
    798   def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
    799            (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
    800   def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
    801            (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
    802 
    803   def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
    804            (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
    805   def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
    806            (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
    807 }
    808 
    809 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
    810          (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
    811 def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
    812          (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
    813 def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
    814          (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
    815                    (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
    816 
    817 def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt),
    818          (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
    819 def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
    820          (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
    821 def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt),
    822          (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
    823 
    824 // From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw).
    825 def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
    826          (C2_or (C2_and  I1:$Pu, I1:$Pv),
    827                 (C2_andn I1:$Pw, I1:$Pu))>;
    828 
    829 
    830 def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{
    831   return isPositiveHalfWord(N);
    832 }]>;
    833 
    834 multiclass SelMinMax16_pats<PatFrag CmpOp, InstHexagon InstA,
    835                             InstHexagon InstB> {
    836   def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)),
    837                                IsPosHalf:$Rs, IsPosHalf:$Rt), i16),
    838            (InstA IntRegs:$Rs, IntRegs:$Rt)>;
    839   def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)),
    840                                IsPosHalf:$Rt, IsPosHalf:$Rs), i16),
    841            (InstB IntRegs:$Rs, IntRegs:$Rt)>;
    842 }
    843 
    844 let AddedComplexity = 200 in {
    845   defm: SelMinMax16_pats<setge,  A2_max,  A2_min>;
    846   defm: SelMinMax16_pats<setgt,  A2_max,  A2_min>;
    847   defm: SelMinMax16_pats<setle,  A2_min,  A2_max>;
    848   defm: SelMinMax16_pats<setlt,  A2_min,  A2_max>;
    849   defm: SelMinMax16_pats<setuge, A2_maxu, A2_minu>;
    850   defm: SelMinMax16_pats<setugt, A2_maxu, A2_minu>;
    851   defm: SelMinMax16_pats<setule, A2_minu, A2_maxu>;
    852   defm: SelMinMax16_pats<setult, A2_minu, A2_maxu>;
    853 }
    854 
    855 let AddedComplexity = 200 in {
    856   defm: SelMinMax_pats<setge,  I32, A2_max,   A2_min>;
    857   defm: SelMinMax_pats<setgt,  I32, A2_max,   A2_min>;
    858   defm: SelMinMax_pats<setle,  I32, A2_min,   A2_max>;
    859   defm: SelMinMax_pats<setlt,  I32, A2_min,   A2_max>;
    860   defm: SelMinMax_pats<setuge, I32, A2_maxu,  A2_minu>;
    861   defm: SelMinMax_pats<setugt, I32, A2_maxu,  A2_minu>;
    862   defm: SelMinMax_pats<setule, I32, A2_minu,  A2_maxu>;
    863   defm: SelMinMax_pats<setult, I32, A2_minu,  A2_maxu>;
    864 
    865   defm: SelMinMax_pats<setge,  I64, A2_maxp,  A2_minp>;
    866   defm: SelMinMax_pats<setgt,  I64, A2_maxp,  A2_minp>;
    867   defm: SelMinMax_pats<setle,  I64, A2_minp,  A2_maxp>;
    868   defm: SelMinMax_pats<setlt,  I64, A2_minp,  A2_maxp>;
    869   defm: SelMinMax_pats<setuge, I64, A2_maxup, A2_minup>;
    870   defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>;
    871   defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>;
    872   defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
    873 }
    874 
    875 let AddedComplexity = 100, Predicates = [HasV5] in {
    876   defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
    877   defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
    878   defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
    879   defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>;
    880 }
    881 
    882 
    883 // --(7) Insert/extract --------------------------------------------------
    884 //
    885 
    886 def SDTHexagonINSERT:
    887   SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
    888                        SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
    889 def HexagonINSERT:    SDNode<"HexagonISD::INSERT",   SDTHexagonINSERT>;
    890 
    891 let AddedComplexity = 10 in {
    892   def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
    893            (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>;
    894   def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
    895            (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>;
    896 }
    897 def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off),
    898          (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>;
    899 def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off),
    900          (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>;
    901 
    902 def SDTHexagonEXTRACTU
    903   : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
    904                   SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
    905 def HexagonEXTRACTU:   SDNode<"HexagonISD::EXTRACTU",   SDTHexagonEXTRACTU>;
    906 
    907 let AddedComplexity = 10 in {
    908   def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5),
    909            (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>;
    910   def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6),
    911            (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>;
    912 }
    913 def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off),
    914          (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>;
    915 def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off),
    916          (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>;
    917 
    918 def SDTHexagonVSPLAT:
    919   SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
    920 
    921 def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>;
    922 
    923 def: Pat<(v4i8  (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
    924 def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
    925 def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)),
    926          (A2_combineii imm:$s8, imm:$s8)>;
    927 def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;
    928 
    929 let AddedComplexity = 10 in
    930 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
    931      Requires<[HasV62]>;
    932 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
    933          (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;
    934 
    935 
    936 // --(8) Shift/permute ---------------------------------------------------
    937 //
    938 
    939 def SDTHexagonI64I32I32: SDTypeProfile<1, 2,
    940   [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
    941 
    942 def HexagonCOMBINE:  SDNode<"HexagonISD::COMBINE",  SDTHexagonI64I32I32>;
    943 
    944 def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>;
    945 
    946 // The complexity of the combines involving immediates should be greater
    947 // than the complexity of the combine with two registers.
    948 let AddedComplexity = 50 in {
    949   def: Pat<(HexagonCOMBINE I32:$Rs, anyimm:$s8),
    950            (A4_combineri IntRegs:$Rs, imm:$s8)>;
    951   def: Pat<(HexagonCOMBINE anyimm:$s8, I32:$Rs),
    952            (A4_combineir imm:$s8, IntRegs:$Rs)>;
    953 }
    954 
    955 // The complexity of the combine with two immediates should be greater than
    956 // the complexity of a combine involving a register.
    957 let AddedComplexity = 75 in {
    958   def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, anyimm:$u6),
    959            (A4_combineii imm:$s8, imm:$u6)>;
    960   def: Pat<(HexagonCOMBINE anyimm:$s8, s8_0ImmPred:$S8),
    961            (A2_combineii imm:$s8, imm:$S8)>;
    962 }
    963 
    964 def: Pat<(bswap I32:$Rs),  (A2_swiz I32:$Rs)>;
    965 def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)),
    966                                      (A2_swiz (HiReg $Rss)))>;
    967 
    968 def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt),  (S4_lsli imm:$s6, I32:$Rt)>;
    969 def: Pat<(shl I32:$Rs, (i32 16)),         (A2_aslh I32:$Rs)>;
    970 def: Pat<(sra I32:$Rs, (i32 16)),         (A2_asrh I32:$Rs)>;
    971 
    972 def: OpR_RI_pat<S2_asr_i_r,  Sra, i32,   I32,   u5_0ImmPred>;
    973 def: OpR_RI_pat<S2_lsr_i_r,  Srl, i32,   I32,   u5_0ImmPred>;
    974 def: OpR_RI_pat<S2_asl_i_r,  Shl, i32,   I32,   u5_0ImmPred>;
    975 def: OpR_RI_pat<S2_asr_i_p,  Sra, i64,   I64,   u6_0ImmPred>;
    976 def: OpR_RI_pat<S2_lsr_i_p,  Srl, i64,   I64,   u6_0ImmPred>;
    977 def: OpR_RI_pat<S2_asl_i_p,  Shl, i64,   I64,   u6_0ImmPred>;
    978 def: OpR_RI_pat<S2_asr_i_vh, Sra, v4i16, V4I16, u4_0ImmPred>;
    979 def: OpR_RI_pat<S2_lsr_i_vh, Srl, v4i16, V4I16, u4_0ImmPred>;
    980 def: OpR_RI_pat<S2_asl_i_vh, Shl, v4i16, V4I16, u4_0ImmPred>;
    981 def: OpR_RI_pat<S2_asr_i_vh, Sra, v2i32, V2I32, u5_0ImmPred>;
    982 def: OpR_RI_pat<S2_lsr_i_vh, Srl, v2i32, V2I32, u5_0ImmPred>;
    983 def: OpR_RI_pat<S2_asl_i_vh, Shl, v2i32, V2I32, u5_0ImmPred>;
    984 
    985 def: OpR_RR_pat<S2_asr_r_r, Sra, i32, I32, I32>;
    986 def: OpR_RR_pat<S2_lsr_r_r, Srl, i32, I32, I32>;
    987 def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>;
    988 def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
    989 def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
    990 def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
    991 
    992 let Predicates = [HasV60] in {
    993   def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
    994   def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
    995 }
    996 
    997 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
    998          (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
    999 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
   1000          (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
   1001 
   1002 // Prefer S2_addasl_rrri over S2_asl_i_r_acc.
   1003 let AddedComplexity = 120 in
   1004 def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)),
   1005          (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
   1006 
   1007 let AddedComplexity = 100 in {
   1008   def: AccRRI_pat<S2_asr_i_r_acc,   Add, Su<Sra>, I32, u5_0ImmPred>;
   1009   def: AccRRI_pat<S2_asr_i_r_nac,   Sub, Su<Sra>, I32, u5_0ImmPred>;
   1010   def: AccRRI_pat<S2_asr_i_r_and,   And, Su<Sra>, I32, u5_0ImmPred>;
   1011   def: AccRRI_pat<S2_asr_i_r_or,    Or,  Su<Sra>, I32, u5_0ImmPred>;
   1012 
   1013   def: AccRRI_pat<S2_asr_i_p_acc,   Add, Su<Sra>, I64, u6_0ImmPred>;
   1014   def: AccRRI_pat<S2_asr_i_p_nac,   Sub, Su<Sra>, I64, u6_0ImmPred>;
   1015   def: AccRRI_pat<S2_asr_i_p_and,   And, Su<Sra>, I64, u6_0ImmPred>;
   1016   def: AccRRI_pat<S2_asr_i_p_or,    Or,  Su<Sra>, I64, u6_0ImmPred>;
   1017 
   1018   def: AccRRI_pat<S2_lsr_i_r_acc,   Add, Su<Srl>, I32, u5_0ImmPred>;
   1019   def: AccRRI_pat<S2_lsr_i_r_nac,   Sub, Su<Srl>, I32, u5_0ImmPred>;
   1020   def: AccRRI_pat<S2_lsr_i_r_and,   And, Su<Srl>, I32, u5_0ImmPred>;
   1021   def: AccRRI_pat<S2_lsr_i_r_or,    Or,  Su<Srl>, I32, u5_0ImmPred>;
   1022   def: AccRRI_pat<S2_lsr_i_r_xacc,  Xor, Su<Srl>, I32, u5_0ImmPred>;
   1023 
   1024   def: AccRRI_pat<S2_lsr_i_p_acc,   Add, Su<Srl>, I64, u6_0ImmPred>;
   1025   def: AccRRI_pat<S2_lsr_i_p_nac,   Sub, Su<Srl>, I64, u6_0ImmPred>;
   1026   def: AccRRI_pat<S2_lsr_i_p_and,   And, Su<Srl>, I64, u6_0ImmPred>;
   1027   def: AccRRI_pat<S2_lsr_i_p_or,    Or,  Su<Srl>, I64, u6_0ImmPred>;
   1028   def: AccRRI_pat<S2_lsr_i_p_xacc,  Xor, Su<Srl>, I64, u6_0ImmPred>;
   1029 
   1030   def: AccRRI_pat<S2_asl_i_r_acc,   Add, Su<Shl>, I32, u5_0ImmPred>;
   1031   def: AccRRI_pat<S2_asl_i_r_nac,   Sub, Su<Shl>, I32, u5_0ImmPred>;
   1032   def: AccRRI_pat<S2_asl_i_r_and,   And, Su<Shl>, I32, u5_0ImmPred>;
   1033   def: AccRRI_pat<S2_asl_i_r_or,    Or,  Su<Shl>, I32, u5_0ImmPred>;
   1034   def: AccRRI_pat<S2_asl_i_r_xacc,  Xor, Su<Shl>, I32, u5_0ImmPred>;
   1035 
   1036   def: AccRRI_pat<S2_asl_i_p_acc,   Add, Su<Shl>, I64, u6_0ImmPred>;
   1037   def: AccRRI_pat<S2_asl_i_p_nac,   Sub, Su<Shl>, I64, u6_0ImmPred>;
   1038   def: AccRRI_pat<S2_asl_i_p_and,   And, Su<Shl>, I64, u6_0ImmPred>;
   1039   def: AccRRI_pat<S2_asl_i_p_or,    Or,  Su<Shl>, I64, u6_0ImmPred>;
   1040   def: AccRRI_pat<S2_asl_i_p_xacc,  Xor, Su<Shl>, I64, u6_0ImmPred>;
   1041 
   1042   let Predicates = [HasV60] in {
   1043     def: AccRRI_pat<S6_rol_i_r_acc,   Add, Su<Rol>, I32, u5_0ImmPred>;
   1044     def: AccRRI_pat<S6_rol_i_r_nac,   Sub, Su<Rol>, I32, u5_0ImmPred>;
   1045     def: AccRRI_pat<S6_rol_i_r_and,   And, Su<Rol>, I32, u5_0ImmPred>;
   1046     def: AccRRI_pat<S6_rol_i_r_or,    Or,  Su<Rol>, I32, u5_0ImmPred>;
   1047     def: AccRRI_pat<S6_rol_i_r_xacc,  Xor, Su<Rol>, I32, u5_0ImmPred>;
   1048 
   1049     def: AccRRI_pat<S6_rol_i_p_acc,   Add, Su<Rol>, I64, u6_0ImmPred>;
   1050     def: AccRRI_pat<S6_rol_i_p_nac,   Sub, Su<Rol>, I64, u6_0ImmPred>;
   1051     def: AccRRI_pat<S6_rol_i_p_and,   And, Su<Rol>, I64, u6_0ImmPred>;
   1052     def: AccRRI_pat<S6_rol_i_p_or,    Or,  Su<Rol>, I64, u6_0ImmPred>;
   1053     def: AccRRI_pat<S6_rol_i_p_xacc,  Xor, Su<Rol>, I64, u6_0ImmPred>;
   1054   }
   1055 }
   1056 
   1057 let AddedComplexity = 100 in {
   1058   def: AccRRR_pat<S2_asr_r_r_acc,   Add, Su<Sra>, I32, I32, I32>;
   1059   def: AccRRR_pat<S2_asr_r_r_nac,   Sub, Su<Sra>, I32, I32, I32>;
   1060   def: AccRRR_pat<S2_asr_r_r_and,   And, Su<Sra>, I32, I32, I32>;
   1061   def: AccRRR_pat<S2_asr_r_r_or,    Or,  Su<Sra>, I32, I32, I32>;
   1062 
   1063   def: AccRRR_pat<S2_asr_r_p_acc,   Add, Su<Sra>, I64, I64, I32>;
   1064   def: AccRRR_pat<S2_asr_r_p_nac,   Sub, Su<Sra>, I64, I64, I32>;
   1065   def: AccRRR_pat<S2_asr_r_p_and,   And, Su<Sra>, I64, I64, I32>;
   1066   def: AccRRR_pat<S2_asr_r_p_or,    Or,  Su<Sra>, I64, I64, I32>;
   1067   def: AccRRR_pat<S2_asr_r_p_xor,   Xor, Su<Sra>, I64, I64, I32>;
   1068 
   1069   def: AccRRR_pat<S2_lsr_r_r_acc,   Add, Su<Srl>, I32, I32, I32>;
   1070   def: AccRRR_pat<S2_lsr_r_r_nac,   Sub, Su<Srl>, I32, I32, I32>;
   1071   def: AccRRR_pat<S2_lsr_r_r_and,   And, Su<Srl>, I32, I32, I32>;
   1072   def: AccRRR_pat<S2_lsr_r_r_or,    Or,  Su<Srl>, I32, I32, I32>;
   1073 
   1074   def: AccRRR_pat<S2_lsr_r_p_acc,   Add, Su<Srl>, I64, I64, I32>;
   1075   def: AccRRR_pat<S2_lsr_r_p_nac,   Sub, Su<Srl>, I64, I64, I32>;
   1076   def: AccRRR_pat<S2_lsr_r_p_and,   And, Su<Srl>, I64, I64, I32>;
   1077   def: AccRRR_pat<S2_lsr_r_p_or,    Or,  Su<Srl>, I64, I64, I32>;
   1078   def: AccRRR_pat<S2_lsr_r_p_xor,   Xor, Su<Srl>, I64, I64, I32>;
   1079 
   1080   def: AccRRR_pat<S2_asl_r_r_acc,   Add, Su<Shl>, I32, I32, I32>;
   1081   def: AccRRR_pat<S2_asl_r_r_nac,   Sub, Su<Shl>, I32, I32, I32>;
   1082   def: AccRRR_pat<S2_asl_r_r_and,   And, Su<Shl>, I32, I32, I32>;
   1083   def: AccRRR_pat<S2_asl_r_r_or,    Or,  Su<Shl>, I32, I32, I32>;
   1084 
   1085   def: AccRRR_pat<S2_asl_r_p_acc,   Add, Su<Shl>, I64, I64, I32>;
   1086   def: AccRRR_pat<S2_asl_r_p_nac,   Sub, Su<Shl>, I64, I64, I32>;
   1087   def: AccRRR_pat<S2_asl_r_p_and,   And, Su<Shl>, I64, I64, I32>;
   1088   def: AccRRR_pat<S2_asl_r_p_or,    Or,  Su<Shl>, I64, I64, I32>;
   1089   def: AccRRR_pat<S2_asl_r_p_xor,   Xor, Su<Shl>, I64, I64, I32>;
   1090 }
   1091 
   1092 
   1093 class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp,
   1094                   PatFrag RegPred, PatFrag ImmPred>
   1095   : Pat<(Op anyimm:$u8, (ShOp RegPred:$Rs, ImmPred:$U5)),
   1096         (MI anyimm:$u8, RegPred:$Rs, imm:$U5)>;
   1097 
   1098 let AddedComplexity = 200 in {
   1099   def: OpshIRI_pat<S4_addi_asl_ri,  Add, Su<Shl>, I32, u5_0ImmPred>;
   1100   def: OpshIRI_pat<S4_addi_lsr_ri,  Add, Su<Srl>, I32, u5_0ImmPred>;
   1101   def: OpshIRI_pat<S4_subi_asl_ri,  Sub, Su<Shl>, I32, u5_0ImmPred>;
   1102   def: OpshIRI_pat<S4_subi_lsr_ri,  Sub, Su<Srl>, I32, u5_0ImmPred>;
   1103   def: OpshIRI_pat<S4_andi_asl_ri,  And, Su<Shl>, I32, u5_0ImmPred>;
   1104   def: OpshIRI_pat<S4_andi_lsr_ri,  And, Su<Srl>, I32, u5_0ImmPred>;
   1105   def: OpshIRI_pat<S4_ori_asl_ri,   Or,  Su<Shl>, I32, u5_0ImmPred>;
   1106   def: OpshIRI_pat<S4_ori_lsr_ri,   Or,  Su<Srl>, I32, u5_0ImmPred>;
   1107 }
   1108 
   1109 // Prefer this pattern to S2_asl_i_p_or for the special case of joining
   1110 // two 32-bit words into a 64-bit word.
   1111 let AddedComplexity = 200 in
   1112 def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)),
   1113          (Combinew I32:$a, I32:$b)>;
   1114 
   1115 def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)),
   1116                      (Zext64 (and I32:$a, (i32 65535)))),
   1117                  (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))),
   1118              (shl (Aext64 I32:$d), (i32 48))),
   1119          (Combinew (A2_combine_ll I32:$d, I32:$c),
   1120                    (A2_combine_ll I32:$b, I32:$a))>;
   1121 
   1122 def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
   1123                                (i32 8)),
   1124                           (i32 (zextloadi8 (add I32:$b, 2)))),
   1125                       (i32 16)),
   1126                  (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
   1127              (zextloadi8 I32:$b)),
   1128          (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
   1129 
   1130 let AddedComplexity = 200 in {
   1131   def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))),
   1132            (A2_combine_ll I32:$Rt, I32:$Rs)>;
   1133   def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))),
   1134            (A2_combine_lh I32:$Rt, I32:$Rs)>;
   1135   def: Pat<(or (and I32:$Rt, (i32 268431360)), (and I32:$Rs, (i32 65535))),
   1136            (A2_combine_hl I32:$Rt, I32:$Rs)>;
   1137   def: Pat<(or (and I32:$Rt, (i32 268431360)), (srl I32:$Rs, (i32 16))),
   1138            (A2_combine_hh I32:$Rt, I32:$Rs)>;
   1139 }
   1140 
   1141 def SDTHexagonVShift
   1142   : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>;
   1143 
   1144 def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>;
   1145 def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>;
   1146 def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>;
   1147 
   1148 def: OpR_RI_pat<S2_asl_i_vw, pf2<HexagonVASL>, v2i32, V2I32, u5_0ImmPred>;
   1149 def: OpR_RI_pat<S2_asl_i_vh, pf2<HexagonVASL>, v4i16, V4I16, u4_0ImmPred>;
   1150 def: OpR_RI_pat<S2_asr_i_vw, pf2<HexagonVASR>, v2i32, V2I32, u5_0ImmPred>;
   1151 def: OpR_RI_pat<S2_asr_i_vh, pf2<HexagonVASR>, v4i16, V4I16, u4_0ImmPred>;
   1152 def: OpR_RI_pat<S2_lsr_i_vw, pf2<HexagonVLSR>, v2i32, V2I32, u5_0ImmPred>;
   1153 def: OpR_RI_pat<S2_lsr_i_vh, pf2<HexagonVLSR>, v4i16, V4I16, u4_0ImmPred>;
   1154 
   1155 def: OpR_RR_pat<S2_asl_r_vw, pf2<HexagonVASL>, v2i32, V2I32, I32>;
   1156 def: OpR_RR_pat<S2_asl_r_vh, pf2<HexagonVASL>, v4i16, V4I16, I32>;
   1157 def: OpR_RR_pat<S2_asr_r_vw, pf2<HexagonVASR>, v2i32, V2I32, I32>;
   1158 def: OpR_RR_pat<S2_asr_r_vh, pf2<HexagonVASR>, v4i16, V4I16, I32>;
   1159 def: OpR_RR_pat<S2_lsr_r_vw, pf2<HexagonVLSR>, v2i32, V2I32, I32>;
   1160 def: OpR_RR_pat<S2_lsr_r_vh, pf2<HexagonVLSR>, v4i16, V4I16, I32>;
   1161 
   1162 def: Pat<(sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))),
   1163          (S2_asr_i_vw V2I32:$b, imm:$c)>;
   1164 def: Pat<(srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))),
   1165          (S2_lsr_i_vw V2I32:$b, imm:$c)>;
   1166 def: Pat<(shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))),
   1167          (S2_asl_i_vw V2I32:$b, imm:$c)>;
   1168 def: Pat<(sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
   1169          (S2_asr_i_vh V4I16:$b, imm:$c)>;
   1170 def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
   1171          (S2_lsr_i_vh V4I16:$b, imm:$c)>;
   1172 def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
   1173          (S2_asl_i_vh V4I16:$b, imm:$c)>;
   1174 
   1175 
   1176 // --(9) Arithmetic/bitwise ----------------------------------------------
   1177 //
   1178 
   1179 def: Pat<(abs  I32:$Rs), (A2_abs   I32:$Rs)>;
   1180 def: Pat<(abs  I64:$Rs), (A2_absp  I64:$Rs)>;
   1181 def: Pat<(not  I32:$Rs), (A2_subri -1, I32:$Rs)>;
   1182 def: Pat<(not  I64:$Rs), (A2_notp  I64:$Rs)>;
   1183 def: Pat<(ineg I64:$Rs), (A2_negp  I64:$Rs)>;
   1184 
   1185 let Predicates = [HasV5] in {
   1186   def: Pat<(fabs F32:$Rs), (S2_clrbit_i    F32:$Rs, 31)>;
   1187   def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
   1188 
   1189   def: Pat<(fabs F64:$Rs),
   1190            (Combinew (S2_clrbit_i (HiReg $Rs), 31),
   1191                      (i32 (LoReg $Rs)))>;
   1192   def: Pat<(fneg F64:$Rs),
   1193            (Combinew (S2_togglebit_i (HiReg $Rs), 31),
   1194                      (i32 (LoReg $Rs)))>;
   1195 }
   1196 
   1197 def: Pat<(add I32:$Rs, anyimm:$s16),   (A2_addi   I32:$Rs,  imm:$s16)>;
   1198 def: Pat<(or  I32:$Rs, anyimm:$s10),   (A2_orir   I32:$Rs,  imm:$s10)>;
   1199 def: Pat<(and I32:$Rs, anyimm:$s10),   (A2_andir  I32:$Rs,  imm:$s10)>;
   1200 def: Pat<(sub anyimm:$s10, I32:$Rs),   (A2_subri  imm:$s10, I32:$Rs)>;
   1201 
   1202 def: OpR_RR_pat<A2_add,       Add,        i32,   I32>;
   1203 def: OpR_RR_pat<A2_sub,       Sub,        i32,   I32>;
   1204 def: OpR_RR_pat<A2_and,       And,        i32,   I32>;
   1205 def: OpR_RR_pat<A2_or,        Or,         i32,   I32>;
   1206 def: OpR_RR_pat<A2_xor,       Xor,        i32,   I32>;
   1207 def: OpR_RR_pat<A2_addp,      Add,        i64,   I64>;
   1208 def: OpR_RR_pat<A2_subp,      Sub,        i64,   I64>;
   1209 def: OpR_RR_pat<A2_andp,      And,        i64,   I64>;
   1210 def: OpR_RR_pat<A2_orp,       Or,         i64,   I64>;
   1211 def: OpR_RR_pat<A2_xorp,      Xor,        i64,   I64>;
   1212 def: OpR_RR_pat<A4_andnp,     Not2<And>,  i64,   I64>;
   1213 def: OpR_RR_pat<A4_ornp,      Not2<Or>,   i64,   I64>;
   1214 
   1215 def: OpR_RR_pat<A2_svaddh,    Add,        v2i16, V2I16>;
   1216 def: OpR_RR_pat<A2_svsubh,    Sub,        v2i16, V2I16>;
   1217 
   1218 def: OpR_RR_pat<A2_vaddub,    Add,        v8i8,  V8I8>;
   1219 def: OpR_RR_pat<A2_vaddh,     Add,        v4i16, V4I16>;
   1220 def: OpR_RR_pat<A2_vaddw,     Add,        v2i32, V2I32>;
   1221 def: OpR_RR_pat<A2_vsubub,    Sub,        v8i8,  V8I8>;
   1222 def: OpR_RR_pat<A2_vsubh,     Sub,        v4i16, V4I16>;
   1223 def: OpR_RR_pat<A2_vsubw,     Sub,        v2i32, V2I32>;
   1224 
   1225 def: OpR_RR_pat<A2_and,       And,        v4i8,  V4I8>;
   1226 def: OpR_RR_pat<A2_xor,       Xor,        v4i8,  V4I8>;
   1227 def: OpR_RR_pat<A2_or,        Or,         v4i8,  V4I8>;
   1228 def: OpR_RR_pat<A2_and,       And,        v2i16, V2I16>;
   1229 def: OpR_RR_pat<A2_xor,       Xor,        v2i16, V2I16>;
   1230 def: OpR_RR_pat<A2_or,        Or,         v2i16, V2I16>;
   1231 def: OpR_RR_pat<A2_andp,      And,        v8i8,  V8I8>;
   1232 def: OpR_RR_pat<A2_orp,       Or,         v8i8,  V8I8>;
   1233 def: OpR_RR_pat<A2_xorp,      Xor,        v8i8,  V8I8>;
   1234 def: OpR_RR_pat<A2_andp,      And,        v4i16, V4I16>;
   1235 def: OpR_RR_pat<A2_orp,       Or,         v4i16, V4I16>;
   1236 def: OpR_RR_pat<A2_xorp,      Xor,        v4i16, V4I16>;
   1237 def: OpR_RR_pat<A2_andp,      And,        v2i32, V2I32>;
   1238 def: OpR_RR_pat<A2_orp,       Or,         v2i32, V2I32>;
   1239 def: OpR_RR_pat<A2_xorp,      Xor,        v2i32, V2I32>;
   1240 
   1241 def: OpR_RR_pat<M2_mpyi,      Mul,        i32,   I32>;
   1242 def: OpR_RR_pat<M2_mpy_up,    pf2<mulhs>, i32,   I32>;
   1243 def: OpR_RR_pat<M2_mpyu_up,   pf2<mulhu>, i32,   I32>;
   1244 def: OpR_RI_pat<M2_mpysip,    Mul,        i32,   I32, u32_0ImmPred>;
   1245 def: OpR_RI_pat<M2_mpysmi,    Mul,        i32,   I32, s32_0ImmPred>;
   1246 
   1247 // Arithmetic on predicates.
   1248 def: OpR_RR_pat<C2_xor,       Add,        i1,    I1>;
   1249 def: OpR_RR_pat<C2_xor,       Add,        v2i1,  V2I1>;
   1250 def: OpR_RR_pat<C2_xor,       Add,        v4i1,  V4I1>;
   1251 def: OpR_RR_pat<C2_xor,       Add,        v8i1,  V8I1>;
   1252 def: OpR_RR_pat<C2_xor,       Sub,        i1,    I1>;
   1253 def: OpR_RR_pat<C2_xor,       Sub,        v2i1,  V2I1>;
   1254 def: OpR_RR_pat<C2_xor,       Sub,        v4i1,  V4I1>;
   1255 def: OpR_RR_pat<C2_xor,       Sub,        v8i1,  V8I1>;
   1256 def: OpR_RR_pat<C2_and,       Mul,        i1,    I1>;
   1257 def: OpR_RR_pat<C2_and,       Mul,        v2i1,  V2I1>;
   1258 def: OpR_RR_pat<C2_and,       Mul,        v4i1,  V4I1>;
   1259 def: OpR_RR_pat<C2_and,       Mul,        v8i1,  V8I1>;
   1260 
   1261 let Predicates = [HasV5] in {
   1262   def: OpR_RR_pat<F2_sfadd,     pf2<fadd>,    f32, F32>;
   1263   def: OpR_RR_pat<F2_sfsub,     pf2<fsub>,    f32, F32>;
   1264   def: OpR_RR_pat<F2_sfmpy,     pf2<fmul>,    f32, F32>;
   1265   def: OpR_RR_pat<F2_sfmin,     pf2<fminnum>, f32, F32>;
   1266   def: OpR_RR_pat<F2_sfmax,     pf2<fmaxnum>, f32, F32>;
   1267 }
   1268 
   1269 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
   1270 // over add-add with individual multiplies as inputs.
   1271 let AddedComplexity = 10 in {
   1272   def: AccRRI_pat<M2_macsip,    Add, Su<Mul>, I32, u32_0ImmPred>;
   1273   def: AccRRI_pat<M2_macsin,    Sub, Su<Mul>, I32, u32_0ImmPred>;
   1274   def: AccRRR_pat<M2_maci,      Add, Su<Mul>, I32, I32, I32>;
   1275 }
   1276 
   1277 def: AccRRI_pat<M2_naccii,    Sub, Su<Add>, I32, s32_0ImmPred>;
   1278 def: AccRRI_pat<M2_accii,     Add, Su<Add>, I32, s32_0ImmPred>;
   1279 def: AccRRR_pat<M2_acci,      Add, Su<Add>, I32, I32, I32>;
   1280 
   1281 // Mulh for vectors
   1282 //
   1283 def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)),
   1284          (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)),
   1285                    (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>;
   1286 
   1287 def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)),
   1288          (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)),
   1289                    (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>;
   1290 
   1291 def Mulhub:
   1292   OutPatFrag<(ops node:$Rss, node:$Rtt),
   1293              (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))),
   1294                        (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>;
   1295 
   1296 // Equivalent of byte-wise arithmetic shift right by 7 in v8i8.
   1297 def Asr7:
   1298   OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>;
   1299 
   1300 def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)),
   1301          (Mulhub $Rss, $Rtt)>;
   1302 
   1303 def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)),
   1304          (A2_vsubub
   1305            (Mulhub $Rss, $Rtt),
   1306            (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)),
   1307                       (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>;
   1308 
   1309 def Mpysh:
   1310   OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>;
   1311 def Mpyshh:
   1312   OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>;
   1313 def Mpyshl:
   1314   OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>;
   1315 
   1316 def Mulhsh:
   1317   OutPatFrag<(ops node:$Rss, node:$Rtt),
   1318              (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)),
   1319                                       (LoReg (Mpyshh $Rss, $Rtt))),
   1320                        (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)),
   1321                                       (LoReg (Mpyshl $Rss, $Rtt))))>;
   1322 
   1323 def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>;
   1324 
   1325 def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)),
   1326          (A2_vaddh
   1327            (Mulhsh $Rss, $Rtt),
   1328            (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)),
   1329                      (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>;
   1330 
   1331 
   1332 def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)),
   1333          (M2_mpysin IntRegs:$Rs, imm:$u8)>;
   1334 
   1335 def n8_0ImmPred: PatLeaf<(i32 imm), [{
   1336   int64_t V = N->getSExtValue();
   1337   return -255 <= V && V <= 0;
   1338 }]>;
   1339 
   1340 // Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
   1341 def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
   1342          (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>;
   1343 
   1344 def: Pat<(add Sext64:$Rs, I64:$Rt),
   1345          (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
   1346 
   1347 def: AccRRR_pat<M4_and_and,   And, Su<And>,       I32,  I32,  I32>;
   1348 def: AccRRR_pat<M4_and_or,    And, Su<Or>,        I32,  I32,  I32>;
   1349 def: AccRRR_pat<M4_and_xor,   And, Su<Xor>,       I32,  I32,  I32>;
   1350 def: AccRRR_pat<M4_or_and,    Or,  Su<And>,       I32,  I32,  I32>;
   1351 def: AccRRR_pat<M4_or_or,     Or,  Su<Or>,        I32,  I32,  I32>;
   1352 def: AccRRR_pat<M4_or_xor,    Or,  Su<Xor>,       I32,  I32,  I32>;
   1353 def: AccRRR_pat<M4_xor_and,   Xor, Su<And>,       I32,  I32,  I32>;
   1354 def: AccRRR_pat<M4_xor_or,    Xor, Su<Or>,        I32,  I32,  I32>;
   1355 def: AccRRR_pat<M2_xor_xacc,  Xor, Su<Xor>,       I32,  I32,  I32>;
   1356 def: AccRRR_pat<M4_xor_xacc,  Xor, Su<Xor>,       I64,  I64,  I64>;
   1357 
   1358 // For dags like (or (and (not _), _), (shl _, _)) where the "or" with
   1359 // one argument matches the patterns below, and with the other argument
   1360 // matches S2_asl_r_r_or, etc, prefer the patterns below.
   1361 let AddedComplexity = 110 in {  // greater than S2_asl_r_r_and/or/xor.
   1362   def: AccRRR_pat<M4_and_andn,  And, Su<Not2<And>>, I32,  I32,  I32>;
   1363   def: AccRRR_pat<M4_or_andn,   Or,  Su<Not2<And>>, I32,  I32,  I32>;
   1364   def: AccRRR_pat<M4_xor_andn,  Xor, Su<Not2<And>>, I32,  I32,  I32>;
   1365 }
   1366 
   1367 // S4_addaddi and S4_subaddi don't have tied operands, so give them
   1368 // a bit of preference.
   1369 let AddedComplexity = 30 in {
   1370   def: Pat<(add I32:$Rs, (Su<Add> I32:$Ru, anyimm:$s6)),
   1371            (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
   1372   def: Pat<(add anyimm:$s6, (Su<Add> I32:$Rs, I32:$Ru)),
   1373            (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
   1374   def: Pat<(add I32:$Rs, (Su<Sub> anyimm:$s6, I32:$Ru)),
   1375            (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
   1376   def: Pat<(sub (Su<Add> I32:$Rs, anyimm:$s6), I32:$Ru),
   1377            (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
   1378   def: Pat<(add (Su<Sub> I32:$Rs, I32:$Ru), anyimm:$s6),
   1379            (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
   1380 }
   1381 
   1382 def: Pat<(or I32:$Ru, (Su<And> I32:$Rx, anyimm:$s10)),
   1383          (S4_or_andix IntRegs:$Ru, IntRegs:$Rx, imm:$s10)>;
   1384 def: Pat<(or I32:$Rx, (Su<And> I32:$Rs, anyimm:$s10)),
   1385          (S4_or_andi IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>;
   1386 def: Pat<(or I32:$Rx, (Su<Or> I32:$Rs, anyimm:$s10)),
   1387          (S4_or_ori IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>;
   1388 
   1389 
   1390 def: Pat<(i32 (trunc (sra (Su<Mul> Sext64:$Rs, Sext64:$Rt), (i32 32)))),
   1391          (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
   1392 def: Pat<(i32 (trunc (srl (Su<Mul> Sext64:$Rs, Sext64:$Rt), (i32 32)))),
   1393          (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
   1394 
   1395 def: Pat<(mul (Zext64 I32:$Rs), (Zext64 I32:$Rt)),
   1396          (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>;
   1397 def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)),
   1398          (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>;
   1399 def: Pat<(mul Sext64:$Rs, Sext64:$Rt),
   1400          (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
   1401 
   1402 def: Pat<(add I64:$Rx, (Su<Mul> Sext64:$Rs, Sext64:$Rt)),
   1403          (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
   1404 def: Pat<(sub I64:$Rx, (Su<Mul> Sext64:$Rs, Sext64:$Rt)),
   1405          (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>;
   1406 def: Pat<(add I64:$Rx, (Su<Mul> (Aext64 I32:$Rs), (Aext64 I32:$Rt))),
   1407          (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
   1408 def: Pat<(add I64:$Rx, (Su<Mul> (Zext64 I32:$Rs), (Zext64 I32:$Rt))),
   1409          (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
   1410 def: Pat<(sub I64:$Rx, (Su<Mul> (Aext64 I32:$Rs), (Aext64 I32:$Rt))),
   1411          (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
   1412 def: Pat<(sub I64:$Rx, (Su<Mul> (Zext64 I32:$Rs), (Zext64 I32:$Rt))),
   1413          (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>;
   1414 
   1415 // Add halfword.
   1416 def: Pat<(sext_inreg (add I32:$Rt, I32:$Rs), i16),
   1417          (A2_addh_l16_ll I32:$Rt, I32:$Rs)>;
   1418 def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)),
   1419          (A2_addh_l16_hl I32:$Rt, I32:$Rs)>;
   1420 def: Pat<(shl (add I32:$Rt, I32:$Rs), (i32 16)),
   1421          (A2_addh_h16_ll I32:$Rt, I32:$Rs)>;
   1422 
   1423 // Subtract halfword.
   1424 def: Pat<(sext_inreg (sub I32:$Rt, I32:$Rs), i16),
   1425          (A2_subh_l16_ll I32:$Rt, I32:$Rs)>;
   1426 def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)),
   1427          (A2_addh_l16_hl I32:$Rt, I32:$Rs)>;
   1428 def: Pat<(shl (sub I32:$Rt, I32:$Rs), (i32 16)),
   1429          (A2_subh_h16_ll I32:$Rt, I32:$Rs)>;
   1430 
   1431 def: Pat<(mul I64:$Rss, I64:$Rtt),
   1432          (Combinew
   1433            (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))),
   1434                              (LoReg $Rss),
   1435                              (HiReg $Rtt)),
   1436                     (LoReg $Rtt),
   1437                     (HiReg $Rss)),
   1438            (i32 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)))))>;
   1439 
   1440 def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
   1441   (A2_addp
   1442     (M2_dpmpyuu_acc_s0
   1443       (S2_lsr_i_p
   1444         (A2_addp
   1445           (M2_dpmpyuu_acc_s0
   1446             (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
   1447             (HiReg $Rss),
   1448             (LoReg $Rtt)),
   1449           (A4_combineir 0, (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
   1450         32),
   1451       (HiReg $Rss),
   1452       (HiReg $Rtt)),
   1453     (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
   1454 
   1455 // Multiply 64-bit unsigned and use upper result.
   1456 def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
   1457 
   1458 // Multiply 64-bit signed and use upper result.
   1459 //
   1460 // For two signed 64-bit integers A and B, let A' and B' denote A and B
   1461 // with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
   1462 // sign bit of A (and identically for B). With this notation, the signed
   1463 // product A*B can be written as:
   1464 //   AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
   1465 //      = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
   1466 //      = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
   1467 //      = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
   1468 
   1469 // Clear the sign bit in a 64-bit register.
   1470 def ClearSign : OutPatFrag<(ops node:$Rss),
   1471   (Combinew (S2_clrbit_i (HiReg $Rss), 31), (i32 (LoReg $Rss)))>;
   1472 
   1473 def : Pat <(mulhs I64:$Rss, I64:$Rtt),
   1474   (A2_subp
   1475     (MulHU $Rss, $Rtt),
   1476     (A2_addp
   1477       (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
   1478       (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
   1479 
   1480 // Prefer these instructions over M2_macsip/M2_macsin: the macsi* instructions
   1481 // will put the immediate addend into a register, while these instructions will
   1482 // use it directly. Such a construct does not appear in the middle of a gep,
   1483 // where M2_macsip would be preferable.
   1484 let AddedComplexity = 20 in {
   1485   def: Pat<(add (Su<Mul> I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6),
   1486            (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
   1487   def: Pat<(add (Su<Mul> I32:$Rs, I32:$Rt), anyimm:$u6),
   1488            (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
   1489 }
   1490 
   1491 // Keep these instructions less preferable to M2_macsip/M2_macsin.
   1492 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, u6_2ImmPred:$u6_2)),
   1493          (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>;
   1494 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, anyimm:$u6)),
   1495          (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>;
   1496 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
   1497          (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
   1498 
   1499 
   1500 let Predicates = [HasV5] in {
   1501   def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
   1502            (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
   1503   def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
   1504            (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
   1505   def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
   1506            (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
   1507 }
   1508 
   1509 
   1510 def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
   1511          (PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
   1512 def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
   1513          (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>;
   1514 
   1515 // Add/subtract two v4i8: Hexagon does not have an insn for this one, so
   1516 // we use the double add v8i8, and use only the low part of the result.
   1517 def: Pat<(add V4I8:$Rs, V4I8:$Rt),
   1518          (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
   1519 def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
   1520          (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
   1521 
   1522 // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two
   1523 // half-words, and saturates the result to a 32-bit value, except the
   1524 // saturation never happens (it can only occur with scaling).
   1525 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
   1526          (LoReg (S2_vtrunewh (A2_combineii 0, 0),
   1527                              (M2_vmpy2s_s0 V2I16:$Rs, V2I16:$Rt)))>;
   1528 def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
   1529          (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)),
   1530                       (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>;
   1531 
   1532 // Multiplies two v4i8 vectors.
   1533 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
   1534          (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
   1535      Requires<[HasV5]>;
   1536 
   1537 // Multiplies two v8i8 vectors.
   1538 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
   1539          (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
   1540                    (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
   1541      Requires<[HasV5]>;
   1542 
   1543 
   1544 // --(10) Bit ------------------------------------------------------------
   1545 //
   1546 
   1547 // Count leading zeros.
   1548 def: Pat<(ctlz I32:$Rs),                      (S2_cl0 I32:$Rs)>;
   1549 def: Pat<(i32 (trunc (ctlz I64:$Rss))),       (S2_cl0p I64:$Rss)>;
   1550 
   1551 // Count trailing zeros.
   1552 def: Pat<(cttz I32:$Rs),                      (S2_ct0 I32:$Rs)>;
   1553 def: Pat<(i32 (trunc (cttz I64:$Rss))),       (S2_ct0p I64:$Rss)>;
   1554 
   1555 // Count leading ones.
   1556 def: Pat<(ctlz (not I32:$Rs)),                (S2_cl1 I32:$Rs)>;
   1557 def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
   1558 
   1559 // Count trailing ones.
   1560 def: Pat<(cttz (not I32:$Rs)),                (S2_ct1 I32:$Rs)>;
   1561 def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
   1562 
   1563 // Define leading/trailing patterns that require zero-extensions to 64 bits.
   1564 def: Pat<(i64 (ctlz I64:$Rss)),               (ToZext64 (S2_cl0p I64:$Rss))>;
   1565 def: Pat<(i64 (cttz I64:$Rss)),               (ToZext64 (S2_ct0p I64:$Rss))>;
   1566 def: Pat<(i64 (ctlz (not I64:$Rss))),         (ToZext64 (S2_cl1p I64:$Rss))>;
   1567 def: Pat<(i64 (cttz (not I64:$Rss))),         (ToZext64 (S2_ct1p I64:$Rss))>;
   1568 
   1569 def: Pat<(i64 (ctpop I64:$Rss)),  (ToZext64 (S5_popcountp I64:$Rss))>;
   1570 def: Pat<(i32 (ctpop I32:$Rs)),   (S5_popcountp (A4_combineir 0, I32:$Rs))>;
   1571 
   1572 def: Pat<(bitreverse I32:$Rs),    (S2_brev I32:$Rs)>;
   1573 def: Pat<(bitreverse I64:$Rss),   (S2_brevp I64:$Rss)>;
   1574 
   1575 let AddedComplexity = 20 in { // Complexity greater than and/or/xor
   1576   def: Pat<(and I32:$Rs, IsNPow2_32:$V),
   1577            (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
   1578   def: Pat<(or I32:$Rs, IsPow2_32:$V),
   1579            (S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>;
   1580   def: Pat<(xor I32:$Rs, IsPow2_32:$V),
   1581            (S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>;
   1582 
   1583   def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))),
   1584            (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
   1585   def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)),
   1586            (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
   1587   def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)),
   1588            (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
   1589 }
   1590 
   1591 // Clr/set/toggle bit for 64-bit values with immediate bit index.
   1592 let AddedComplexity = 20 in { // Complexity greater than and/or/xor
   1593   def: Pat<(and I64:$Rss, IsNPow2_64L:$V),
   1594            (Combinew (i32 (HiReg $Rss)),
   1595                      (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)))>;
   1596   def: Pat<(and I64:$Rss, IsNPow2_64H:$V),
   1597            (Combinew (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))),
   1598                      (i32 (LoReg $Rss)))>;
   1599 
   1600   def: Pat<(or I64:$Rss, IsPow2_64L:$V),
   1601            (Combinew (i32 (HiReg $Rss)),
   1602                      (S2_setbit_i (LoReg $Rss), (Log2_64 $V)))>;
   1603   def: Pat<(or I64:$Rss, IsPow2_64H:$V),
   1604            (Combinew (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))),
   1605                      (i32 (LoReg $Rss)))>;
   1606 
   1607   def: Pat<(xor I64:$Rss, IsPow2_64L:$V),
   1608            (Combinew (i32 (HiReg $Rss)),
   1609                      (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)))>;
   1610   def: Pat<(xor I64:$Rss, IsPow2_64H:$V),
   1611            (Combinew (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))),
   1612                      (i32 (LoReg $Rss)))>;
   1613 }
   1614 
   1615 let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
   1616   def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
   1617            (S2_tstbit_i IntRegs:$Rs, imm:$u5)>;
   1618   def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
   1619            (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
   1620   def: Pat<(i1 (trunc I32:$Rs)),
   1621            (S2_tstbit_i IntRegs:$Rs, 0)>;
   1622   def: Pat<(i1 (trunc I64:$Rs)),
   1623            (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
   1624 }
   1625 
   1626 let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
   1627   def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
   1628            (C2_bitsclri IntRegs:$Rs, imm:$u6)>;
   1629   def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)),
   1630            (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
   1631 }
   1632 
   1633 let AddedComplexity = 10 in   // Complexity greater than compare reg-reg.
   1634 def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
   1635          (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
   1636 
   1637 def SDTTestBit:
   1638   SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
   1639 def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>;
   1640 
   1641 def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
   1642          (S2_tstbit_i I32:$Rs, imm:$u5)>;
   1643 def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
   1644          (S2_tstbit_r I32:$Rs, I32:$Rt)>;
   1645 
   1646 let AddedComplexity = 20 in {   // Complexity greater than cmp reg-imm.
   1647   def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
   1648            (S4_ntstbit_i I32:$Rs, imm:$u5)>;
   1649   def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
   1650            (S4_ntstbit_r I32:$Rs, I32:$Rt)>;
   1651 }
   1652 
   1653 // Add extra complexity to prefer these instructions over bitsset/bitsclr.
   1654 // The reason is that tstbit/ntstbit can be folded into a compound instruction:
   1655 //   if ([!]tstbit(...)) jump ...
   1656 let AddedComplexity = 100 in
   1657 def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
   1658          (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
   1659 
   1660 let AddedComplexity = 100 in
   1661 def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
   1662          (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
   1663 
   1664 // Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
   1665 // represented as a compare against "value & 0xFF", which is an exact match
   1666 // for cmpb (same for cmph). The patterns below do not contain any additional
   1667 // complexity that would make them preferable, and if they were actually used
   1668 // instead of cmpb/cmph, they would result in a compare against register that
   1669 // is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
   1670 def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
   1671          (C4_nbitsclri I32:$Rs, imm:$u6)>;
   1672 def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
   1673          (C4_nbitsclr I32:$Rs, I32:$Rt)>;
   1674 def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
   1675          (C4_nbitsset I32:$Rs, I32:$Rt)>;
   1676 
   1677 // Special patterns to address certain cases where the "top-down" matching
   1678 // algorithm would cause suboptimal selection.
   1679 
   1680 let AddedComplexity = 100 in {
   1681   // Avoid A4_rcmp[n]eqi in these cases:
   1682   def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
   1683            (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
   1684   def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
   1685            (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
   1686 }
   1687 
   1688 // --(11) PIC ------------------------------------------------------------
   1689 //
   1690 
   1691 def SDT_HexagonAtGot
   1692   : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
   1693 def SDT_HexagonAtPcrel
   1694   : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
   1695 
   1696 // AT_GOT address-of-GOT, address-of-global, offset-in-global
   1697 def HexagonAtGot       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
   1698 // AT_PCREL address-of-global
   1699 def HexagonAtPcrel     : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
   1700 
   1701 def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
   1702          (L2_loadri_io I32:$got, imm:$addr)>;
   1703 def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
   1704          (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
   1705 def: Pat<(HexagonAtPcrel I32:$addr),
   1706          (C4_addipc imm:$addr)>;
   1707 
   1708 // The HVX load patterns also match AT_PCREL directly. Make sure that
   1709 // if the selection of this opcode changes, it's updated in all places.
   1710 
   1711 
   1712 // --(12) Load -----------------------------------------------------------
   1713 //
   1714 
   1715 def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{
   1716   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
   1717 }]>;
   1718 def extloadv4i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{
   1719   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8;
   1720 }]>;
   1721 
   1722 def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
   1723   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
   1724 }]>;
   1725 def zextloadv4i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
   1726   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8;
   1727 }]>;
   1728 
   1729 def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
   1730   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
   1731 }]>;
   1732 def sextloadv4i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
   1733   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8;
   1734 }]>;
   1735 
   1736 // Patterns to select load-indexed: Rs + Off.
   1737 // - frameindex [+ imm],
   1738 multiclass Loadxfi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
   1739                        InstHexagon MI> {
   1740   def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
   1741            (VT (MI AddrFI:$fi, imm:$Off))>;
   1742   def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
   1743            (VT (MI AddrFI:$fi, imm:$Off))>;
   1744   def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
   1745 }
   1746 
   1747 // Patterns to select load-indexed: Rs + Off.
   1748 // - base reg [+ imm]
   1749 multiclass Loadxgi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
   1750                        InstHexagon MI> {
   1751   def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
   1752            (VT (MI IntRegs:$Rs, imm:$Off))>;
   1753   def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))),
   1754            (VT (MI IntRegs:$Rs, imm:$Off))>;
   1755   def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>;
   1756 }
   1757 
   1758 // Patterns to select load-indexed: Rs + Off. Combines Loadxfi + Loadxgi.
   1759 multiclass Loadxi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
   1760                       InstHexagon MI> {
   1761   defm: Loadxfi_pat<Load, VT, ImmPred, MI>;
   1762   defm: Loadxgi_pat<Load, VT, ImmPred, MI>;
   1763 }
   1764 
   1765 // Patterns to select load reg indexed: Rs + Off with a value modifier.
   1766 // - frameindex [+ imm]
   1767 multiclass Loadxfim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
   1768                         PatLeaf ImmPred, InstHexagon MI> {
   1769   def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
   1770            (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
   1771   def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
   1772            (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
   1773   def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>;
   1774 }
   1775 
   1776 // Patterns to select load reg indexed: Rs + Off with a value modifier.
   1777 // - base reg [+ imm]
   1778 multiclass Loadxgim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
   1779                         PatLeaf ImmPred, InstHexagon MI> {
   1780   def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
   1781            (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
   1782   def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))),
   1783            (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
   1784   def: Pat<(VT (Load I32:$Rs)), (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
   1785 }
   1786 
   1787 // Patterns to select load reg indexed: Rs + Off with a value modifier.
   1788 // Combines Loadxfim + Loadxgim.
   1789 multiclass Loadxim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
   1790                        PatLeaf ImmPred, InstHexagon MI> {
   1791   defm: Loadxfim_pat<Load, VT, ValueMod, ImmPred, MI>;
   1792   defm: Loadxgim_pat<Load, VT, ValueMod, ImmPred, MI>;
   1793 }
   1794 
   1795 // Pattern to select load reg reg-indexed: Rs + Rt<<u2.
   1796 class Loadxr_shl_pat<PatFrag Load, ValueType VT, InstHexagon MI>
   1797   : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
   1798         (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
   1799 
   1800 // Pattern to select load reg reg-indexed: Rs + Rt<<0.
   1801 class Loadxr_add_pat<PatFrag Load, ValueType VT, InstHexagon MI>
   1802   : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
   1803         (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
   1804 
   1805 // Pattern to select load reg reg-indexed: Rs + Rt<<u2 with value modifier.
   1806 class Loadxrm_shl_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
   1807                       InstHexagon MI>
   1808   : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
   1809         (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>;
   1810 
   1811 // Pattern to select load reg reg-indexed: Rs + Rt<<0 with value modifier.
   1812 class Loadxrm_add_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
   1813                       InstHexagon MI>
   1814   : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
   1815         (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>;
   1816 
   1817 // Pattern to select load long-offset reg-indexed: Addr + Rt<<u2.
   1818 // Don't match for u2==0, instead use reg+imm for those cases.
   1819 class Loadxu_pat<PatFrag Load, ValueType VT, PatFrag ImmPred, InstHexagon MI>
   1820   : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))),
   1821         (VT (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr))>;
   1822 
   1823 class Loadxum_pat<PatFrag Load, ValueType VT, PatFrag ImmPred, PatFrag ValueMod,
   1824                   InstHexagon MI>
   1825   : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))),
   1826         (VT (ValueMod (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr)))>;
   1827 
   1828 // Pattern to select load absolute.
   1829 class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
   1830   : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
   1831 
   1832 // Pattern to select load absolute with value modifier.
   1833 class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
   1834                  InstHexagon MI>
   1835   : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
   1836 
   1837 
   1838 let AddedComplexity = 20 in {
   1839   defm: Loadxi_pat<extloadi1,       i32,   anyimm0, L2_loadrub_io>;
   1840   defm: Loadxi_pat<extloadi8,       i32,   anyimm0, L2_loadrub_io>;
   1841   defm: Loadxi_pat<extloadi16,      i32,   anyimm1, L2_loadruh_io>;
   1842   defm: Loadxi_pat<extloadv2i8,     v2i16, anyimm1, L2_loadbzw2_io>;
   1843   defm: Loadxi_pat<extloadv4i8,     v4i16, anyimm2, L2_loadbzw4_io>;
   1844   defm: Loadxi_pat<sextloadi8,      i32,   anyimm0, L2_loadrb_io>;
   1845   defm: Loadxi_pat<sextloadi16,     i32,   anyimm1, L2_loadrh_io>;
   1846   defm: Loadxi_pat<sextloadv2i8,    v2i16, anyimm1, L2_loadbsw2_io>;
   1847   defm: Loadxi_pat<sextloadv4i8,    v4i16, anyimm2, L2_loadbzw4_io>;
   1848   defm: Loadxi_pat<zextloadi1,      i32,   anyimm0, L2_loadrub_io>;
   1849   defm: Loadxi_pat<zextloadi8,      i32,   anyimm0, L2_loadrub_io>;
   1850   defm: Loadxi_pat<zextloadi16,     i32,   anyimm1, L2_loadruh_io>;
   1851   defm: Loadxi_pat<zextloadv2i8,    v2i16, anyimm1, L2_loadbzw2_io>;
   1852   defm: Loadxi_pat<zextloadv4i8,    v4i16, anyimm2, L2_loadbzw4_io>;
   1853   defm: Loadxi_pat<load,            i32,   anyimm2, L2_loadri_io>;
   1854   defm: Loadxi_pat<load,            v2i16, anyimm2, L2_loadri_io>;
   1855   defm: Loadxi_pat<load,            v4i8,  anyimm2, L2_loadri_io>;
   1856   defm: Loadxi_pat<load,            i64,   anyimm3, L2_loadrd_io>;
   1857   defm: Loadxi_pat<load,            v2i32, anyimm3, L2_loadrd_io>;
   1858   defm: Loadxi_pat<load,            v4i16, anyimm3, L2_loadrd_io>;
   1859   defm: Loadxi_pat<load,            v8i8,  anyimm3, L2_loadrd_io>;
   1860   defm: Loadxi_pat<load,            f32,   anyimm2, L2_loadri_io>;
   1861   defm: Loadxi_pat<load,            f64,   anyimm3, L2_loadrd_io>;
   1862   // No sextloadi1.
   1863 
   1864   defm: Loadxi_pat<atomic_load_8 ,  i32, anyimm0, L2_loadrub_io>;
   1865   defm: Loadxi_pat<atomic_load_16,  i32, anyimm1, L2_loadruh_io>;
   1866   defm: Loadxi_pat<atomic_load_32,  i32, anyimm2, L2_loadri_io>;
   1867   defm: Loadxi_pat<atomic_load_64,  i64, anyimm3, L2_loadrd_io>;
   1868 }
   1869 
   1870 let AddedComplexity = 30 in {
   1871   defm: Loadxim_pat<extloadi1,    i64, ToZext64, anyimm0, L2_loadrub_io>;
   1872   defm: Loadxim_pat<extloadi8,    i64, ToZext64, anyimm0, L2_loadrub_io>;
   1873   defm: Loadxim_pat<extloadi16,   i64, ToZext64, anyimm1, L2_loadruh_io>;
   1874   defm: Loadxim_pat<extloadi32,   i64, ToZext64, anyimm2, L2_loadri_io>;
   1875   defm: Loadxim_pat<zextloadi1,   i64, ToZext64, anyimm0, L2_loadrub_io>;
   1876   defm: Loadxim_pat<zextloadi8,   i64, ToZext64, anyimm0, L2_loadrub_io>;
   1877   defm: Loadxim_pat<zextloadi16,  i64, ToZext64, anyimm1, L2_loadruh_io>;
   1878   defm: Loadxim_pat<zextloadi32,  i64, ToZext64, anyimm2, L2_loadri_io>;
   1879   defm: Loadxim_pat<sextloadi8,   i64, ToSext64, anyimm0, L2_loadrb_io>;
   1880   defm: Loadxim_pat<sextloadi16,  i64, ToSext64, anyimm1, L2_loadrh_io>;
   1881   defm: Loadxim_pat<sextloadi32,  i64, ToSext64, anyimm2, L2_loadri_io>;
   1882 }
   1883 
   1884 let AddedComplexity  = 60 in {
   1885   def: Loadxu_pat<extloadi8,    i32,   anyimm0, L4_loadrub_ur>;
   1886   def: Loadxu_pat<extloadi16,   i32,   anyimm1, L4_loadruh_ur>;
   1887   def: Loadxu_pat<extloadv2i8,  v2i16, anyimm1, L4_loadbzw2_ur>;
   1888   def: Loadxu_pat<extloadv4i8,  v4i16, anyimm2, L4_loadbzw4_ur>;
   1889   def: Loadxu_pat<sextloadi8,   i32,   anyimm0, L4_loadrb_ur>;
   1890   def: Loadxu_pat<sextloadi16,  i32,   anyimm1, L4_loadrh_ur>;
   1891   def: Loadxu_pat<sextloadv2i8, v2i16, anyimm1, L4_loadbsw2_ur>;
   1892   def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
   1893   def: Loadxu_pat<zextloadi8,   i32,   anyimm0, L4_loadrub_ur>;
   1894   def: Loadxu_pat<zextloadi16,  i32,   anyimm1, L4_loadruh_ur>;
   1895   def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
   1896   def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
   1897   def: Loadxu_pat<load,         i32,   anyimm2, L4_loadri_ur>;
   1898   def: Loadxu_pat<load,         v2i16, anyimm2, L4_loadri_ur>;
   1899   def: Loadxu_pat<load,         v4i8,  anyimm2, L4_loadri_ur>;
   1900   def: Loadxu_pat<load,         i64,   anyimm3, L4_loadrd_ur>;
   1901   def: Loadxu_pat<load,         v2i32, anyimm3, L4_loadrd_ur>;
   1902   def: Loadxu_pat<load,         v4i16, anyimm3, L4_loadrd_ur>;
   1903   def: Loadxu_pat<load,         v8i8,  anyimm3, L4_loadrd_ur>;
   1904   def: Loadxu_pat<load,         f32,   anyimm2, L4_loadri_ur>;
   1905   def: Loadxu_pat<load,         f64,   anyimm3, L4_loadrd_ur>;
   1906 
   1907   def: Loadxum_pat<sextloadi8,  i64, anyimm0, ToSext64, L4_loadrb_ur>;
   1908   def: Loadxum_pat<zextloadi8,  i64, anyimm0, ToZext64, L4_loadrub_ur>;
   1909   def: Loadxum_pat<extloadi8,   i64, anyimm0, ToZext64, L4_loadrub_ur>;
   1910   def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>;
   1911   def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
   1912   def: Loadxum_pat<extloadi16,  i64, anyimm1, ToZext64, L4_loadruh_ur>;
   1913   def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>;
   1914   def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
   1915   def: Loadxum_pat<extloadi32,  i64, anyimm2, ToZext64, L4_loadri_ur>;
   1916 }
   1917 
   1918 let AddedComplexity = 40 in {
   1919   def: Loadxr_shl_pat<extloadi8,     i32,   L4_loadrub_rr>;
   1920   def: Loadxr_shl_pat<zextloadi8,    i32,   L4_loadrub_rr>;
   1921   def: Loadxr_shl_pat<sextloadi8,    i32,   L4_loadrb_rr>;
   1922   def: Loadxr_shl_pat<extloadi16,    i32,   L4_loadruh_rr>;
   1923   def: Loadxr_shl_pat<zextloadi16,   i32,   L4_loadruh_rr>;
   1924   def: Loadxr_shl_pat<sextloadi16,   i32,   L4_loadrh_rr>;
   1925   def: Loadxr_shl_pat<load,          i32,   L4_loadri_rr>;
   1926   def: Loadxr_shl_pat<load,          v2i16, L4_loadri_rr>;
   1927   def: Loadxr_shl_pat<load,          v4i8,  L4_loadri_rr>;
   1928   def: Loadxr_shl_pat<load,          i64,   L4_loadrd_rr>;
   1929   def: Loadxr_shl_pat<load,          v2i32, L4_loadrd_rr>;
   1930   def: Loadxr_shl_pat<load,          v4i16, L4_loadrd_rr>;
   1931   def: Loadxr_shl_pat<load,          v8i8,  L4_loadrd_rr>;
   1932   def: Loadxr_shl_pat<load,          f32,   L4_loadri_rr>;
   1933   def: Loadxr_shl_pat<load,          f64,   L4_loadrd_rr>;
   1934 }
   1935 
   1936 let AddedComplexity = 20 in {
   1937   def: Loadxr_add_pat<extloadi8,     i32,   L4_loadrub_rr>;
   1938   def: Loadxr_add_pat<zextloadi8,    i32,   L4_loadrub_rr>;
   1939   def: Loadxr_add_pat<sextloadi8,    i32,   L4_loadrb_rr>;
   1940   def: Loadxr_add_pat<extloadi16,    i32,   L4_loadruh_rr>;
   1941   def: Loadxr_add_pat<zextloadi16,   i32,   L4_loadruh_rr>;
   1942   def: Loadxr_add_pat<sextloadi16,   i32,   L4_loadrh_rr>;
   1943   def: Loadxr_add_pat<load,          i32,   L4_loadri_rr>;
   1944   def: Loadxr_add_pat<load,          v2i16, L4_loadri_rr>;
   1945   def: Loadxr_add_pat<load,          v4i8,  L4_loadri_rr>;
   1946   def: Loadxr_add_pat<load,          i64,   L4_loadrd_rr>;
   1947   def: Loadxr_add_pat<load,          v2i32, L4_loadrd_rr>;
   1948   def: Loadxr_add_pat<load,          v4i16, L4_loadrd_rr>;
   1949   def: Loadxr_add_pat<load,          v8i8,  L4_loadrd_rr>;
   1950   def: Loadxr_add_pat<load,          f32,   L4_loadri_rr>;
   1951   def: Loadxr_add_pat<load,          f64,   L4_loadrd_rr>;
   1952 }
   1953 
   1954 let AddedComplexity = 40 in {
   1955   def: Loadxrm_shl_pat<extloadi8,    i64, ToZext64, L4_loadrub_rr>;
   1956   def: Loadxrm_shl_pat<zextloadi8,   i64, ToZext64, L4_loadrub_rr>;
   1957   def: Loadxrm_shl_pat<sextloadi8,   i64, ToSext64, L4_loadrb_rr>;
   1958   def: Loadxrm_shl_pat<extloadi16,   i64, ToZext64, L4_loadruh_rr>;
   1959   def: Loadxrm_shl_pat<zextloadi16,  i64, ToZext64, L4_loadruh_rr>;
   1960   def: Loadxrm_shl_pat<sextloadi16,  i64, ToSext64, L4_loadrh_rr>;
   1961   def: Loadxrm_shl_pat<extloadi32,   i64, ToZext64, L4_loadri_rr>;
   1962   def: Loadxrm_shl_pat<zextloadi32,  i64, ToZext64, L4_loadri_rr>;
   1963   def: Loadxrm_shl_pat<sextloadi32,  i64, ToSext64, L4_loadri_rr>;
   1964 }
   1965 
   1966 let AddedComplexity = 20 in {
   1967   def: Loadxrm_add_pat<extloadi8,    i64, ToZext64, L4_loadrub_rr>;
   1968   def: Loadxrm_add_pat<zextloadi8,   i64, ToZext64, L4_loadrub_rr>;
   1969   def: Loadxrm_add_pat<sextloadi8,   i64, ToSext64, L4_loadrb_rr>;
   1970   def: Loadxrm_add_pat<extloadi16,   i64, ToZext64, L4_loadruh_rr>;
   1971   def: Loadxrm_add_pat<zextloadi16,  i64, ToZext64, L4_loadruh_rr>;
   1972   def: Loadxrm_add_pat<sextloadi16,  i64, ToSext64, L4_loadrh_rr>;
   1973   def: Loadxrm_add_pat<extloadi32,   i64, ToZext64, L4_loadri_rr>;
   1974   def: Loadxrm_add_pat<zextloadi32,  i64, ToZext64, L4_loadri_rr>;
   1975   def: Loadxrm_add_pat<sextloadi32,  i64, ToSext64, L4_loadri_rr>;
   1976 }
   1977 
   1978 // Absolute address
   1979 
   1980 let AddedComplexity  = 60 in {
   1981   def: Loada_pat<zextloadi1,      i32,   anyimm0, PS_loadrubabs>;
   1982   def: Loada_pat<sextloadi8,      i32,   anyimm0, PS_loadrbabs>;
   1983   def: Loada_pat<extloadi8,       i32,   anyimm0, PS_loadrubabs>;
   1984   def: Loada_pat<zextloadi8,      i32,   anyimm0, PS_loadrubabs>;
   1985   def: Loada_pat<sextloadi16,     i32,   anyimm1, PS_loadrhabs>;
   1986   def: Loada_pat<extloadi16,      i32,   anyimm1, PS_loadruhabs>;
   1987   def: Loada_pat<zextloadi16,     i32,   anyimm1, PS_loadruhabs>;
   1988   def: Loada_pat<load,            i32,   anyimm2, PS_loadriabs>;
   1989   def: Loada_pat<load,            v2i16, anyimm2, PS_loadriabs>;
   1990   def: Loada_pat<load,            v4i8,  anyimm2, PS_loadriabs>;
   1991   def: Loada_pat<load,            i64,   anyimm3, PS_loadrdabs>;
   1992   def: Loada_pat<load,            v2i32, anyimm3, PS_loadrdabs>;
   1993   def: Loada_pat<load,            v4i16, anyimm3, PS_loadrdabs>;
   1994   def: Loada_pat<load,            v8i8,  anyimm3, PS_loadrdabs>;
   1995   def: Loada_pat<load,            f32,   anyimm2, PS_loadriabs>;
   1996   def: Loada_pat<load,            f64,   anyimm3, PS_loadrdabs>;
   1997 
   1998   def: Loada_pat<atomic_load_8,   i32, anyimm0, PS_loadrubabs>;
   1999   def: Loada_pat<atomic_load_16,  i32, anyimm1, PS_loadruhabs>;
   2000   def: Loada_pat<atomic_load_32,  i32, anyimm2, PS_loadriabs>;
   2001   def: Loada_pat<atomic_load_64,  i64, anyimm3, PS_loadrdabs>;
   2002 }
   2003 
   2004 let AddedComplexity  = 30 in {
   2005   def: Loadam_pat<extloadi8,      i64, anyimm0, ToZext64, PS_loadrubabs>;
   2006   def: Loadam_pat<sextloadi8,     i64, anyimm0, ToSext64, PS_loadrbabs>;
   2007   def: Loadam_pat<zextloadi8,     i64, anyimm0, ToZext64, PS_loadrubabs>;
   2008   def: Loadam_pat<extloadi16,     i64, anyimm1, ToZext64, PS_loadruhabs>;
   2009   def: Loadam_pat<sextloadi16,    i64, anyimm1, ToSext64, PS_loadrhabs>;
   2010   def: Loadam_pat<zextloadi16,    i64, anyimm1, ToZext64, PS_loadruhabs>;
   2011   def: Loadam_pat<extloadi32,     i64, anyimm2, ToZext64, PS_loadriabs>;
   2012   def: Loadam_pat<sextloadi32,    i64, anyimm2, ToSext64, PS_loadriabs>;
   2013   def: Loadam_pat<zextloadi32,    i64, anyimm2, ToZext64, PS_loadriabs>;
   2014 
   2015   def: Loadam_pat<load,           i1,  anyimm0, I32toI1,  PS_loadrubabs>;
   2016   def: Loadam_pat<zextloadi1,     i64, anyimm0, ToZext64, PS_loadrubabs>;
   2017 }
   2018 
   2019 // GP-relative address
   2020 
   2021 let AddedComplexity  = 100 in {
   2022   def: Loada_pat<extloadi1,       i32,   addrgp,  L2_loadrubgp>;
   2023   def: Loada_pat<zextloadi1,      i32,   addrgp,  L2_loadrubgp>;
   2024   def: Loada_pat<extloadi8,       i32,   addrgp,  L2_loadrubgp>;
   2025   def: Loada_pat<sextloadi8,      i32,   addrgp,  L2_loadrbgp>;
   2026   def: Loada_pat<zextloadi8,      i32,   addrgp,  L2_loadrubgp>;
   2027   def: Loada_pat<extloadi16,      i32,   addrgp,  L2_loadruhgp>;
   2028   def: Loada_pat<sextloadi16,     i32,   addrgp,  L2_loadrhgp>;
   2029   def: Loada_pat<zextloadi16,     i32,   addrgp,  L2_loadruhgp>;
   2030   def: Loada_pat<load,            i32,   addrgp,  L2_loadrigp>;
   2031   def: Loada_pat<load,            v2i16, addrgp,  L2_loadrigp>;
   2032   def: Loada_pat<load,            v4i8,  addrgp,  L2_loadrigp>;
   2033   def: Loada_pat<load,            i64,   addrgp,  L2_loadrdgp>;
   2034   def: Loada_pat<load,            v2i32, addrgp,  L2_loadrdgp>;
   2035   def: Loada_pat<load,            v4i16, addrgp,  L2_loadrdgp>;
   2036   def: Loada_pat<load,            v8i8,  addrgp,  L2_loadrdgp>;
   2037   def: Loada_pat<load,            f32,   addrgp,  L2_loadrigp>;
   2038   def: Loada_pat<load,            f64,   addrgp,  L2_loadrdgp>;
   2039 
   2040   def: Loada_pat<atomic_load_8,   i32, addrgp,  L2_loadrubgp>;
   2041   def: Loada_pat<atomic_load_16,  i32, addrgp,  L2_loadruhgp>;
   2042   def: Loada_pat<atomic_load_32,  i32, addrgp,  L2_loadrigp>;
   2043   def: Loada_pat<atomic_load_64,  i64, addrgp,  L2_loadrdgp>;
   2044 }
   2045 
   2046 let AddedComplexity  = 70 in {
   2047   def: Loadam_pat<extloadi8,      i64, addrgp,  ToZext64, L2_loadrubgp>;
   2048   def: Loadam_pat<sextloadi8,     i64, addrgp,  ToSext64, L2_loadrbgp>;
   2049   def: Loadam_pat<zextloadi8,     i64, addrgp,  ToZext64, L2_loadrubgp>;
   2050   def: Loadam_pat<extloadi16,     i64, addrgp,  ToZext64, L2_loadruhgp>;
   2051   def: Loadam_pat<sextloadi16,    i64, addrgp,  ToSext64, L2_loadrhgp>;
   2052   def: Loadam_pat<zextloadi16,    i64, addrgp,  ToZext64, L2_loadruhgp>;
   2053   def: Loadam_pat<extloadi32,     i64, addrgp,  ToZext64, L2_loadrigp>;
   2054   def: Loadam_pat<sextloadi32,    i64, addrgp,  ToSext64, L2_loadrigp>;
   2055   def: Loadam_pat<zextloadi32,    i64, addrgp,  ToZext64, L2_loadrigp>;
   2056 
   2057   def: Loadam_pat<load,           i1,  addrgp,  I32toI1,  L2_loadrubgp>;
   2058   def: Loadam_pat<zextloadi1,     i64, addrgp,  ToZext64, L2_loadrubgp>;
   2059 }
   2060 
   2061 
   2062 // Sign-extending loads of i1 need to replicate the lowest bit throughout
   2063 // the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
   2064 // do the trick.
   2065 let AddedComplexity = 20 in
   2066 def: Pat<(i32 (sextloadi1 I32:$Rs)),
   2067          (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
   2068 
   2069 // Patterns for loads of i1:
   2070 def: Pat<(i1 (load AddrFI:$fi)),
   2071          (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
   2072 def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))),
   2073          (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
   2074 def: Pat<(i1 (load I32:$Rs)),
   2075          (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
   2076 
   2077 
   2078 // --(13) Store ----------------------------------------------------------
   2079 //
   2080 
   2081 class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI>
   2082   : Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4),
   2083         (MI I32:$Rx, imm:$s4, Value:$Rt)>;
   2084 
   2085 def: Storepi_pat<post_truncsti8,  I32, s4_0ImmPred, S2_storerb_pi>;
   2086 def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
   2087 def: Storepi_pat<post_store,      I32, s4_2ImmPred, S2_storeri_pi>;
   2088 def: Storepi_pat<post_store,      I64, s4_3ImmPred, S2_storerd_pi>;
   2089 
   2090 // Patterns for generating stores, where the address takes different forms:
   2091 // - frameindex,
   2092 // - frameindex + offset,
   2093 // - base + offset,
   2094 // - simple (base address without offset).
   2095 // These would usually be used together (via Storexi_pat defined below), but
   2096 // in some cases one may want to apply different properties (such as
   2097 // AddedComplexity) to the individual patterns.
   2098 class Storexi_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
   2099   : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
   2100 
   2101 multiclass Storexi_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
   2102                               InstHexagon MI> {
   2103   def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
   2104            (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
   2105   def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
   2106            (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
   2107 }
   2108 
   2109 multiclass Storexi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
   2110                            InstHexagon MI> {
   2111   def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
   2112            (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
   2113   def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)),
   2114            (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
   2115 }
   2116 
   2117 class Storexi_base_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
   2118   : Pat<(Store Value:$Rt, I32:$Rs),
   2119         (MI IntRegs:$Rs, 0, Value:$Rt)>;
   2120 
   2121 // Patterns for generating stores, where the address takes different forms,
   2122 // and where the value being stored is transformed through the value modifier
   2123 // ValueMod.  The address forms are same as above.
   2124 class Storexim_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
   2125                       InstHexagon MI>
   2126   : Pat<(Store Value:$Rs, AddrFI:$fi),
   2127         (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
   2128 
   2129 multiclass Storexim_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
   2130                                PatFrag ValueMod, InstHexagon MI> {
   2131   def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
   2132            (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
   2133   def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
   2134            (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
   2135 }
   2136 
   2137 multiclass Storexim_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
   2138                             PatFrag ValueMod, InstHexagon MI> {
   2139   def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
   2140            (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
   2141   def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)),
   2142            (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
   2143 }
   2144 
   2145 class Storexim_base_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
   2146                         InstHexagon MI>
   2147   : Pat<(Store Value:$Rt, I32:$Rs),
   2148         (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
   2149 
   2150 multiclass Storexi_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
   2151                        InstHexagon MI> {
   2152   defm: Storexi_fi_add_pat <Store, Value, ImmPred, MI>;
   2153   def:  Storexi_fi_pat     <Store, Value,          MI>;
   2154   defm: Storexi_add_pat    <Store, Value, ImmPred, MI>;
   2155 }
   2156 
   2157 multiclass Storexim_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
   2158                         PatFrag ValueMod, InstHexagon MI> {
   2159   defm: Storexim_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
   2160   def:  Storexim_fi_pat     <Store, Value,          ValueMod, MI>;
   2161   defm: Storexim_add_pat    <Store, Value, ImmPred, ValueMod, MI>;
   2162 }
   2163 
   2164 // Reg<<S + Imm
   2165 class Storexu_shl_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, InstHexagon MI>
   2166   : Pat<(Store Value:$Rt, (add (shl I32:$Ru, u2_0ImmPred:$u2), ImmPred:$A)),
   2167         (MI IntRegs:$Ru, imm:$u2, ImmPred:$A, Value:$Rt)>;
   2168 
   2169 // Reg<<S + Reg
   2170 class Storexr_shl_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
   2171   : Pat<(Store Value:$Ru, (add I32:$Rs, (shl I32:$Rt, u2_0ImmPred:$u2))),
   2172         (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
   2173 
   2174 // Reg + Reg
   2175 class Storexr_add_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
   2176   : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
   2177         (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
   2178 
   2179 class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
   2180   : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
   2181 
   2182 class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
   2183                   InstHexagon MI>
   2184   : Pat<(Store Value:$val, Addr:$addr),
   2185         (MI Addr:$addr, (ValueMod Value:$val))>;
   2186 
   2187 // Regular stores in the DAG have two operands: value and address.
   2188 // Atomic stores also have two, but they are reversed: address, value.
   2189 // To use atomic stores with the patterns, they need to have their operands
   2190 // swapped. This relies on the knowledge that the F.Fragment uses names
   2191 // "ptr" and "val".
   2192 class AtomSt<PatFrag F>
   2193   : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode,
   2194             F.OperandTransform> {
   2195   let IsAtomic = F.IsAtomic;
   2196   let MemoryVT = F.MemoryVT;
   2197 }
   2198 
   2199 
   2200 def IMM_BYTE : SDNodeXForm<imm, [{
   2201   // -1 can be represented as 255, etc.
   2202   // assigning to a byte restores our desired signed value.
   2203   int8_t imm = N->getSExtValue();
   2204   return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
   2205 }]>;
   2206 
   2207 def IMM_HALF : SDNodeXForm<imm, [{
   2208   // -1 can be represented as 65535, etc.
   2209   // assigning to a short restores our desired signed value.
   2210   int16_t imm = N->getSExtValue();
   2211   return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
   2212 }]>;
   2213 
   2214 def IMM_WORD : SDNodeXForm<imm, [{
   2215   // -1 can be represented as 4294967295, etc.
   2216   // Currently, it's not doing this. But some optimization
   2217   // might convert -1 to a large +ve number.
   2218   // assigning to a word restores our desired signed value.
   2219   int32_t imm = N->getSExtValue();
   2220   return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
   2221 }]>;
   2222 
   2223 def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
   2224 def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
   2225 def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
   2226 
   2227 // Even though the offset is not extendable in the store-immediate, we
   2228 // can still generate the fi# in the base address. If the final offset
   2229 // is not valid for the instruction, we will replace it with a scratch
   2230 // register.
   2231 class SmallStackStore<PatFrag Store>
   2232   : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{
   2233   return isSmallStackStore(cast<StoreSDNode>(N));
   2234 }]>;
   2235 
   2236 // This is the complement of SmallStackStore.
   2237 class LargeStackStore<PatFrag Store>
   2238   : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{
   2239   return !isSmallStackStore(cast<StoreSDNode>(N));
   2240 }]>;
   2241 
   2242 // Preferred addressing modes for various combinations of stored value
   2243 // and address computation.
   2244 // For stores where the address and value are both immediates, prefer
   2245 // store-immediate. The reason is that the constant-extender optimization
   2246 // can replace store-immediate with a store-register, but there is nothing
   2247 // to generate a store-immediate out of a store-register.
   2248 //
   2249 //         C     R     F    F+C   R+C   R+R   R<<S+C   R<<S+R
   2250 // --+-------+-----+-----+------+-----+-----+--------+--------
   2251 // C |   imm | imm | imm |  imm | imm |  rr |     ur |     rr
   2252 // R |  abs* |  io |  io |   io |  io |  rr |     ur |     rr
   2253 //
   2254 // (*) Absolute or GP-relative.
   2255 //
   2256 // Note that any expression can be matched by Reg. In particular, an immediate
   2257 // can always be placed in a register, so patterns checking for Imm should
   2258 // have a higher priority than the ones involving Reg that could also match.
   2259 // For example, *(p+4) could become r1=#4; memw(r0+r1<<#0) instead of the
   2260 // preferred memw(r0+#4). Similarly Reg+Imm or Reg+Reg should be tried before
   2261 // Reg alone.
   2262 //
   2263 // The order in which the different combinations are tried:
   2264 //
   2265 //         C     F     R    F+C   R+C   R+R   R<<S+C   R<<S+R
   2266 // --+-------+-----+-----+------+-----+-----+--------+--------
   2267 // C |     1 |   6 |   - |    5 |   9 |   - |      - |      -
   2268 // R |     2 |   8 |  12 |    7 |  10 |  11 |      3 |      4
   2269 
   2270 
   2271 // First, match the unusual case of doubleword store into Reg+Imm4, i.e.
   2272 // a store where the offset Imm4 is a multiple of 4, but not of 8. This
   2273 // implies that Reg is also a proper multiple of 4. To still generate a
   2274 // doubleword store, add 4 to Reg, and subtract 4 from the offset.
   2275 
   2276 def s30_2ProperPred  : PatLeaf<(i32 imm), [{
   2277   int64_t v = (int64_t)N->getSExtValue();
   2278   return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
   2279 }]>;
   2280 def RoundTo8 : SDNodeXForm<imm, [{
   2281   int32_t Imm = N->getSExtValue();
   2282   return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
   2283 }]>;
   2284 
   2285 let AddedComplexity = 150 in
   2286 def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
   2287          (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
   2288 
   2289 class Storexi_abs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
   2290   : Pat<(Store Value:$val, anyimm:$addr),
   2291         (MI (ToI32 $addr), 0, Value:$val)>;
   2292 class Storexim_abs_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
   2293                        InstHexagon MI>
   2294   : Pat<(Store Value:$val, anyimm:$addr),
   2295         (MI (ToI32 $addr), 0, (ValueMod Value:$val))>;
   2296 
   2297 let AddedComplexity = 140 in {
   2298   def: Storexim_abs_pat<truncstorei8,  anyint, ToImmByte, S4_storeirb_io>;
   2299   def: Storexim_abs_pat<truncstorei16, anyint, ToImmHalf, S4_storeirh_io>;
   2300   def: Storexim_abs_pat<store,         anyint, ToImmWord, S4_storeiri_io>;
   2301 
   2302   def: Storexi_abs_pat<truncstorei8,  anyimm, S4_storeirb_io>;
   2303   def: Storexi_abs_pat<truncstorei16, anyimm, S4_storeirh_io>;
   2304   def: Storexi_abs_pat<store,         anyimm, S4_storeiri_io>;
   2305 }
   2306 
   2307 // GP-relative address
   2308 let AddedComplexity = 120 in {
   2309   def: Storea_pat<truncstorei8,             I32, addrgp, S2_storerbgp>;
   2310   def: Storea_pat<truncstorei16,            I32, addrgp, S2_storerhgp>;
   2311   def: Storea_pat<store,                    I32, addrgp, S2_storerigp>;
   2312   def: Storea_pat<store,                    I64, addrgp, S2_storerdgp>;
   2313   def: Storea_pat<store,                    F32, addrgp, S2_storerigp>;
   2314   def: Storea_pat<store,                    F64, addrgp, S2_storerdgp>;
   2315   def: Storea_pat<AtomSt<atomic_store_8>,   I32, addrgp, S2_storerbgp>;
   2316   def: Storea_pat<AtomSt<atomic_store_16>,  I32, addrgp, S2_storerhgp>;
   2317   def: Storea_pat<AtomSt<atomic_store_32>,  I32, addrgp, S2_storerigp>;
   2318   def: Storea_pat<AtomSt<atomic_store_64>,  I64, addrgp, S2_storerdgp>;
   2319 
   2320   def: Stoream_pat<truncstorei8,  I64, addrgp, LoReg,    S2_storerbgp>;
   2321   def: Stoream_pat<truncstorei16, I64, addrgp, LoReg,    S2_storerhgp>;
   2322   def: Stoream_pat<truncstorei32, I64, addrgp, LoReg,    S2_storerigp>;
   2323   def: Stoream_pat<store,         I1,  addrgp, I1toI32,  S2_storerbgp>;
   2324 }
   2325 
   2326 // Absolute address
   2327 let AddedComplexity = 110 in {
   2328   def: Storea_pat<truncstorei8,             I32, anyimm0, PS_storerbabs>;
   2329   def: Storea_pat<truncstorei16,            I32, anyimm1, PS_storerhabs>;
   2330   def: Storea_pat<store,                    I32, anyimm2, PS_storeriabs>;
   2331   def: Storea_pat<store,                    I64, anyimm3, PS_storerdabs>;
   2332   def: Storea_pat<store,                    F32, anyimm2, PS_storeriabs>;
   2333   def: Storea_pat<store,                    F64, anyimm3, PS_storerdabs>;
   2334   def: Storea_pat<AtomSt<atomic_store_8>,   I32, anyimm0, PS_storerbabs>;
   2335   def: Storea_pat<AtomSt<atomic_store_16>,  I32, anyimm1, PS_storerhabs>;
   2336   def: Storea_pat<AtomSt<atomic_store_32>,  I32, anyimm2, PS_storeriabs>;
   2337   def: Storea_pat<AtomSt<atomic_store_64>,  I64, anyimm3, PS_storerdabs>;
   2338 
   2339   def: Stoream_pat<truncstorei8,  I64, anyimm0, LoReg,    PS_storerbabs>;
   2340   def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg,    PS_storerhabs>;
   2341   def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg,    PS_storeriabs>;
   2342   def: Stoream_pat<store,         I1,  anyimm0, I1toI32,  PS_storerbabs>;
   2343 }
   2344 
   2345 // Reg<<S + Imm
   2346 let AddedComplexity = 100 in {
   2347   def: Storexu_shl_pat<truncstorei8,  I32, anyimm0, S4_storerb_ur>;
   2348   def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
   2349   def: Storexu_shl_pat<store,         I32, anyimm2, S4_storeri_ur>;
   2350   def: Storexu_shl_pat<store,         I64, anyimm3, S4_storerd_ur>;
   2351   def: Storexu_shl_pat<store,         F32, anyimm2, S4_storeri_ur>;
   2352   def: Storexu_shl_pat<store,         F64, anyimm3, S4_storerd_ur>;
   2353 
   2354   def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)),
   2355            (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>;
   2356 }
   2357 
   2358 // Reg<<S + Reg
   2359 let AddedComplexity = 90 in {
   2360   def: Storexr_shl_pat<truncstorei8,  I32, S4_storerb_rr>;
   2361   def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
   2362   def: Storexr_shl_pat<store,         I32, S4_storeri_rr>;
   2363   def: Storexr_shl_pat<store,         I64, S4_storerd_rr>;
   2364   def: Storexr_shl_pat<store,         F32, S4_storeri_rr>;
   2365   def: Storexr_shl_pat<store,         F64, S4_storerd_rr>;
   2366 
   2367   def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
   2368            (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
   2369 }
   2370 
   2371 class SS_<PatFrag F> : SmallStackStore<F>;
   2372 class LS_<PatFrag F> : LargeStackStore<F>;
   2373 
   2374 multiclass IMFA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> {
   2375   defm: Storexim_fi_add_pat<S, V, O, M, I>;
   2376 }
   2377 multiclass IFA_<PatFrag S, PatFrag V, PatFrag O, InstHexagon I> {
   2378   defm: Storexi_fi_add_pat<S, V, O, I>;
   2379 }
   2380 
   2381 // Fi+Imm, store-immediate
   2382 let AddedComplexity = 80 in {
   2383   defm: IMFA_<SS_<truncstorei8>,  anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>;
   2384   defm: IMFA_<SS_<truncstorei16>, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>;
   2385   defm: IMFA_<SS_<store>,         anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>;
   2386 
   2387   defm: IFA_<SS_<truncstorei8>,   anyimm, u6_0ImmPred, S4_storeirb_io>;
   2388   defm: IFA_<SS_<truncstorei16>,  anyimm, u6_1ImmPred, S4_storeirh_io>;
   2389   defm: IFA_<SS_<store>,          anyimm, u6_2ImmPred, S4_storeiri_io>;
   2390 
   2391   // For large-stack stores, generate store-register (prefer explicit Fi
   2392   // in the address).
   2393   defm: IMFA_<LS_<truncstorei8>,   anyimm, u6_0ImmPred, ToI32, S2_storerb_io>;
   2394   defm: IMFA_<LS_<truncstorei16>,  anyimm, u6_1ImmPred, ToI32, S2_storerh_io>;
   2395   defm: IMFA_<LS_<store>,          anyimm, u6_2ImmPred, ToI32, S2_storeri_io>;
   2396 }
   2397 
   2398 // Fi, store-immediate
   2399 let AddedComplexity = 70 in {
   2400   def: Storexim_fi_pat<SS_<truncstorei8>,  anyint, ToImmByte, S4_storeirb_io>;
   2401   def: Storexim_fi_pat<SS_<truncstorei16>, anyint, ToImmHalf, S4_storeirh_io>;
   2402   def: Storexim_fi_pat<SS_<store>,         anyint, ToImmWord, S4_storeiri_io>;
   2403 
   2404   def: Storexi_fi_pat<SS_<truncstorei8>,   anyimm, S4_storeirb_io>;
   2405   def: Storexi_fi_pat<SS_<truncstorei16>,  anyimm, S4_storeirh_io>;
   2406   def: Storexi_fi_pat<SS_<store>,          anyimm, S4_storeiri_io>;
   2407 
   2408   // For large-stack stores, generate store-register (prefer explicit Fi
   2409   // in the address).
   2410   def: Storexim_fi_pat<LS_<truncstorei8>,  anyimm, ToI32, S2_storerb_io>;
   2411   def: Storexim_fi_pat<LS_<truncstorei16>, anyimm, ToI32, S2_storerh_io>;
   2412   def: Storexim_fi_pat<LS_<store>,         anyimm, ToI32, S2_storeri_io>;
   2413 }
   2414 
   2415 // Fi+Imm, Fi, store-register
   2416 let AddedComplexity = 60 in {
   2417   defm: Storexi_fi_add_pat<truncstorei8,  I32, anyimm, S2_storerb_io>;
   2418   defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
   2419   defm: Storexi_fi_add_pat<store,         I32, anyimm, S2_storeri_io>;
   2420   defm: Storexi_fi_add_pat<store,         I64, anyimm, S2_storerd_io>;
   2421   defm: Storexi_fi_add_pat<store,         F32, anyimm, S2_storeri_io>;
   2422   defm: Storexi_fi_add_pat<store,         F64, anyimm, S2_storerd_io>;
   2423   defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>;
   2424 
   2425   def: Storexi_fi_pat<truncstorei8,   I32, S2_storerb_io>;
   2426   def: Storexi_fi_pat<truncstorei16,  I32, S2_storerh_io>;
   2427   def: Storexi_fi_pat<store,          I32, S2_storeri_io>;
   2428   def: Storexi_fi_pat<store,          I64, S2_storerd_io>;
   2429   def: Storexi_fi_pat<store,          F32, S2_storeri_io>;
   2430   def: Storexi_fi_pat<store,          F64, S2_storerd_io>;
   2431   def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>;
   2432 }
   2433 
   2434 
   2435 multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> {
   2436   defm: Storexim_add_pat<S, V, O, M, I>;
   2437 }
   2438 multiclass IRA_<PatFrag S, PatFrag V, PatFrag O, InstHexagon I> {
   2439   defm: Storexi_add_pat<S, V, O, I>;
   2440 }
   2441 
   2442 // Reg+Imm, store-immediate
   2443 let AddedComplexity = 50 in {
   2444   defm: IMRA_<truncstorei8,   anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>;
   2445   defm: IMRA_<truncstorei16,  anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>;
   2446   defm: IMRA_<store,          anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>;
   2447 
   2448   defm: IRA_<truncstorei8,    anyimm, u6_0ImmPred, S4_storeirb_io>;
   2449   defm: IRA_<truncstorei16,   anyimm, u6_1ImmPred, S4_storeirh_io>;
   2450   defm: IRA_<store,           anyimm, u6_2ImmPred, S4_storeiri_io>;
   2451 }
   2452 
   2453 // Reg+Imm, store-register
   2454 let AddedComplexity = 40 in {
   2455   defm: Storexi_pat<truncstorei8,   I32, anyimm0, S2_storerb_io>;
   2456   defm: Storexi_pat<truncstorei16,  I32, anyimm1, S2_storerh_io>;
   2457   defm: Storexi_pat<store,          I32, anyimm2, S2_storeri_io>;
   2458   defm: Storexi_pat<store,          I64, anyimm3, S2_storerd_io>;
   2459   defm: Storexi_pat<store,          F32, anyimm2, S2_storeri_io>;
   2460   defm: Storexi_pat<store,          F64, anyimm3, S2_storerd_io>;
   2461 
   2462   defm: Storexim_pat<truncstorei8,  I64, anyimm0, LoReg,   S2_storerb_io>;
   2463   defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg,   S2_storerh_io>;
   2464   defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg,   S2_storeri_io>;
   2465   defm: Storexim_pat<store,         I1,  anyimm0, I1toI32, S2_storerb_io>;
   2466 
   2467   defm: Storexi_pat<AtomSt<atomic_store_8>,  I32, anyimm0, S2_storerb_io>;
   2468   defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
   2469   defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
   2470   defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
   2471 }
   2472 
   2473 // Reg+Reg
   2474 let AddedComplexity = 30 in {
   2475   def: Storexr_add_pat<truncstorei8,  I32, S4_storerb_rr>;
   2476   def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
   2477   def: Storexr_add_pat<store,         I32, S4_storeri_rr>;
   2478   def: Storexr_add_pat<store,         I64, S4_storerd_rr>;
   2479   def: Storexr_add_pat<store,         F32, S4_storeri_rr>;
   2480   def: Storexr_add_pat<store,         F64, S4_storerd_rr>;
   2481 
   2482   def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)),
   2483            (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>;
   2484 }
   2485 
   2486 // Reg, store-immediate
   2487 let AddedComplexity = 20 in {
   2488   def: Storexim_base_pat<truncstorei8,  anyint, ToImmByte, S4_storeirb_io>;
   2489   def: Storexim_base_pat<truncstorei16, anyint, ToImmHalf, S4_storeirh_io>;
   2490   def: Storexim_base_pat<store,         anyint, ToImmWord, S4_storeiri_io>;
   2491 
   2492   def: Storexi_base_pat<truncstorei8,   anyimm, S4_storeirb_io>;
   2493   def: Storexi_base_pat<truncstorei16,  anyimm, S4_storeirh_io>;
   2494   def: Storexi_base_pat<store,          anyimm, S4_storeiri_io>;
   2495 }
   2496 
   2497 // Reg, store-register
   2498 let AddedComplexity = 10 in {
   2499   def: Storexi_base_pat<truncstorei8,   I32, S2_storerb_io>;
   2500   def: Storexi_base_pat<truncstorei16,  I32, S2_storerh_io>;
   2501   def: Storexi_base_pat<store,          I32, S2_storeri_io>;
   2502   def: Storexi_base_pat<store,          I64, S2_storerd_io>;
   2503   def: Storexi_base_pat<store,          F32, S2_storeri_io>;
   2504   def: Storexi_base_pat<store,          F64, S2_storerd_io>;
   2505 
   2506   def: Storexim_base_pat<truncstorei8,  I64, LoReg,   S2_storerb_io>;
   2507   def: Storexim_base_pat<truncstorei16, I64, LoReg,   S2_storerh_io>;
   2508   def: Storexim_base_pat<truncstorei32, I64, LoReg,   S2_storeri_io>;
   2509   def: Storexim_base_pat<store,         I1,  I1toI32, S2_storerb_io>;
   2510 
   2511   def: Storexi_base_pat<AtomSt<atomic_store_8>,   I32, S2_storerb_io>;
   2512   def: Storexi_base_pat<AtomSt<atomic_store_16>,  I32, S2_storerh_io>;
   2513   def: Storexi_base_pat<AtomSt<atomic_store_32>,  I32, S2_storeri_io>;
   2514   def: Storexi_base_pat<AtomSt<atomic_store_64>,  I64, S2_storerd_io>;
   2515 }
   2516 
   2517 
   2518 // --(14) Memop ----------------------------------------------------------
   2519 //
   2520 
   2521 def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
   2522   int8_t V = N->getSExtValue();
   2523   return -32 < V && V <= -1;
   2524 }]>;
   2525 
   2526 def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
   2527   int16_t V = N->getSExtValue();
   2528   return -32 < V && V <= -1;
   2529 }]>;
   2530 
   2531 def m5_0ImmPred  : PatLeaf<(i32 imm), [{
   2532   int64_t V = N->getSExtValue();
   2533   return -31 <= V && V <= -1;
   2534 }]>;
   2535 
   2536 def IsNPow2_8 : PatLeaf<(i32 imm), [{
   2537   uint8_t NV = ~N->getZExtValue();
   2538   return isPowerOf2_32(NV);
   2539 }]>;
   2540 
   2541 def IsNPow2_16 : PatLeaf<(i32 imm), [{
   2542   uint16_t NV = ~N->getZExtValue();
   2543   return isPowerOf2_32(NV);
   2544 }]>;
   2545 
   2546 def Log2_8 : SDNodeXForm<imm, [{
   2547   uint8_t V = N->getZExtValue();
   2548   return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
   2549 }]>;
   2550 
   2551 def Log2_16 : SDNodeXForm<imm, [{
   2552   uint16_t V = N->getZExtValue();
   2553   return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
   2554 }]>;
   2555 
   2556 def LogN2_8 : SDNodeXForm<imm, [{
   2557   uint8_t NV = ~N->getZExtValue();
   2558   return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
   2559 }]>;
   2560 
   2561 def LogN2_16 : SDNodeXForm<imm, [{
   2562   uint16_t NV = ~N->getZExtValue();
   2563   return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
   2564 }]>;
   2565 
   2566 def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
   2567 
   2568 multiclass Memopxr_base_pat<PatFrag Load, PatFrag Store, SDNode Oper,
   2569                             InstHexagon MI> {
   2570   // Addr: i32
   2571   def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
   2572            (MI I32:$Rs, 0, I32:$A)>;
   2573   // Addr: fi
   2574   def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
   2575            (MI AddrFI:$Rs, 0, I32:$A)>;
   2576 }
   2577 
   2578 multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
   2579                            SDNode Oper, InstHexagon MI> {
   2580   // Addr: i32
   2581   def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
   2582                   (add I32:$Rs, ImmPred:$Off)),
   2583            (MI I32:$Rs, imm:$Off, I32:$A)>;
   2584   def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), I32:$A),
   2585                   (IsOrAdd I32:$Rs, ImmPred:$Off)),
   2586            (MI I32:$Rs, imm:$Off, I32:$A)>;
   2587   // Addr: fi
   2588   def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
   2589                   (add AddrFI:$Rs, ImmPred:$Off)),
   2590            (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
   2591   def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
   2592                   (IsOrAdd AddrFI:$Rs, ImmPred:$Off)),
   2593            (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
   2594 }
   2595 
   2596 multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
   2597                        SDNode Oper, InstHexagon MI> {
   2598   let Predicates = [UseMEMOPS] in {
   2599     defm: Memopxr_base_pat <Load, Store,          Oper, MI>;
   2600     defm: Memopxr_add_pat  <Load, Store, ImmPred, Oper, MI>;
   2601   }
   2602 }
   2603 
   2604 let AddedComplexity = 200 in {
   2605   // add reg
   2606   defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
   2607         /*anyext*/  L4_add_memopb_io>;
   2608   defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
   2609         /*sext*/    L4_add_memopb_io>;
   2610   defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
   2611         /*zext*/    L4_add_memopb_io>;
   2612   defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
   2613         /*anyext*/  L4_add_memoph_io>;
   2614   defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
   2615         /*sext*/    L4_add_memoph_io>;
   2616   defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
   2617         /*zext*/    L4_add_memoph_io>;
   2618   defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
   2619 
   2620   // sub reg
   2621   defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
   2622         /*anyext*/  L4_sub_memopb_io>;
   2623   defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
   2624         /*sext*/    L4_sub_memopb_io>;
   2625   defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
   2626         /*zext*/    L4_sub_memopb_io>;
   2627   defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
   2628         /*anyext*/  L4_sub_memoph_io>;
   2629   defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
   2630         /*sext*/    L4_sub_memoph_io>;
   2631   defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
   2632         /*zext*/    L4_sub_memoph_io>;
   2633   defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
   2634 
   2635   // and reg
   2636   defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
   2637         /*anyext*/  L4_and_memopb_io>;
   2638   defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
   2639         /*sext*/    L4_and_memopb_io>;
   2640   defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
   2641         /*zext*/    L4_and_memopb_io>;
   2642   defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
   2643         /*anyext*/  L4_and_memoph_io>;
   2644   defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
   2645         /*sext*/    L4_and_memoph_io>;
   2646   defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
   2647         /*zext*/    L4_and_memoph_io>;
   2648   defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
   2649 
   2650   // or reg
   2651   defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
   2652         /*anyext*/  L4_or_memopb_io>;
   2653   defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
   2654         /*sext*/    L4_or_memopb_io>;
   2655   defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
   2656         /*zext*/    L4_or_memopb_io>;
   2657   defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
   2658         /*anyext*/  L4_or_memoph_io>;
   2659   defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
   2660         /*sext*/    L4_or_memoph_io>;
   2661   defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
   2662         /*zext*/    L4_or_memoph_io>;
   2663   defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
   2664 }
   2665 
   2666 
   2667 multiclass Memopxi_base_pat<PatFrag Load, PatFrag Store, SDNode Oper,
   2668                             PatFrag Arg, SDNodeXForm ArgMod, InstHexagon MI> {
   2669   // Addr: i32
   2670   def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
   2671            (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
   2672   // Addr: fi
   2673   def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
   2674            (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
   2675 }
   2676 
   2677 multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
   2678                            SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
   2679                            InstHexagon MI> {
   2680   // Addr: i32
   2681   def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
   2682                   (add I32:$Rs, ImmPred:$Off)),
   2683            (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
   2684   def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), Arg:$A),
   2685                   (IsOrAdd I32:$Rs, ImmPred:$Off)),
   2686            (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
   2687   // Addr: fi
   2688   def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
   2689                   (add AddrFI:$Rs, ImmPred:$Off)),
   2690            (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
   2691   def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
   2692                   (IsOrAdd AddrFI:$Rs, ImmPred:$Off)),
   2693            (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
   2694 }
   2695 
   2696 multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
   2697                        SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
   2698                        InstHexagon MI> {
   2699   let Predicates = [UseMEMOPS] in {
   2700     defm: Memopxi_base_pat <Load, Store,          Oper, Arg, ArgMod, MI>;
   2701     defm: Memopxi_add_pat  <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
   2702   }
   2703 }
   2704 
   2705 let AddedComplexity = 220 in {
   2706   // add imm
   2707   defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
   2708         /*anyext*/  IdImm, L4_iadd_memopb_io>;
   2709   defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
   2710         /*sext*/    IdImm, L4_iadd_memopb_io>;
   2711   defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
   2712         /*zext*/    IdImm, L4_iadd_memopb_io>;
   2713   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
   2714         /*anyext*/  IdImm, L4_iadd_memoph_io>;
   2715   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
   2716         /*sext*/    IdImm, L4_iadd_memoph_io>;
   2717   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
   2718         /*zext*/    IdImm, L4_iadd_memoph_io>;
   2719   defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
   2720                     L4_iadd_memopw_io>;
   2721   defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
   2722         /*anyext*/  NegImm8, L4_iadd_memopb_io>;
   2723   defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
   2724         /*sext*/    NegImm8, L4_iadd_memopb_io>;
   2725   defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
   2726         /*zext*/    NegImm8, L4_iadd_memopb_io>;
   2727   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
   2728         /*anyext*/  NegImm16, L4_iadd_memoph_io>;
   2729   defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
   2730         /*sext*/    NegImm16, L4_iadd_memoph_io>;
   2731   defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
   2732         /*zext*/    NegImm16, L4_iadd_memoph_io>;
   2733   defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
   2734                     L4_iadd_memopw_io>;
   2735 
   2736   // sub imm
   2737   defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
   2738         /*anyext*/  IdImm, L4_isub_memopb_io>;
   2739   defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
   2740         /*sext*/    IdImm, L4_isub_memopb_io>;
   2741   defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
   2742         /*zext*/    IdImm, L4_isub_memopb_io>;
   2743   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
   2744         /*anyext*/  IdImm, L4_isub_memoph_io>;
   2745   defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
   2746         /*sext*/    IdImm, L4_isub_memoph_io>;
   2747   defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
   2748         /*zext*/    IdImm, L4_isub_memoph_io>;
   2749   defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
   2750                     L4_isub_memopw_io>;
   2751   defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
   2752         /*anyext*/  NegImm8, L4_isub_memopb_io>;
   2753   defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
   2754         /*sext*/    NegImm8, L4_isub_memopb_io>;
   2755   defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
   2756         /*zext*/    NegImm8, L4_isub_memopb_io>;
   2757   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
   2758         /*anyext*/  NegImm16, L4_isub_memoph_io>;
   2759   defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
   2760         /*sext*/    NegImm16, L4_isub_memoph_io>;
   2761   defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
   2762         /*zext*/    NegImm16, L4_isub_memoph_io>;
   2763   defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
   2764                     L4_isub_memopw_io>;
   2765 
   2766   // clrbit imm
   2767   defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
   2768         /*anyext*/  LogN2_8, L4_iand_memopb_io>;
   2769   defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
   2770         /*sext*/    LogN2_8, L4_iand_memopb_io>;
   2771   defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
   2772         /*zext*/    LogN2_8, L4_iand_memopb_io>;
   2773   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
   2774         /*anyext*/  LogN2_16, L4_iand_memoph_io>;
   2775   defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
   2776         /*sext*/    LogN2_16, L4_iand_memoph_io>;
   2777   defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
   2778         /*zext*/    LogN2_16, L4_iand_memoph_io>;
   2779   defm: Memopxi_pat<load, store, u6_2ImmPred, and, IsNPow2_32,
   2780 		    LogN2_32, L4_iand_memopw_io>;
   2781 
   2782   // setbit imm
   2783   defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
   2784         /*anyext*/  Log2_8, L4_ior_memopb_io>;
   2785   defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
   2786         /*sext*/    Log2_8, L4_ior_memopb_io>;
   2787   defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
   2788         /*zext*/    Log2_8, L4_ior_memopb_io>;
   2789   defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
   2790         /*anyext*/  Log2_16, L4_ior_memoph_io>;
   2791   defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
   2792         /*sext*/    Log2_16, L4_ior_memoph_io>;
   2793   defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
   2794         /*zext*/    Log2_16, L4_ior_memoph_io>;
   2795   defm: Memopxi_pat<load, store, u6_2ImmPred, or, IsPow2_32,
   2796 		    Log2_32, L4_ior_memopw_io>;
   2797 }
   2798 
   2799 
   2800 // --(15) Call -----------------------------------------------------------
   2801 //
   2802 
   2803 // Pseudo instructions.
   2804 def SDT_SPCallSeqStart
   2805   : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
   2806 def SDT_SPCallSeqEnd
   2807   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
   2808 
   2809 def callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
   2810                           [SDNPHasChain, SDNPOutGlue]>;
   2811 def callseq_end:   SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
   2812                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
   2813 
   2814 def SDT_SPCall: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
   2815 
   2816 def HexagonTCRet: SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
   2817                          [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
   2818 def callv3: SDNode<"HexagonISD::CALL", SDT_SPCall,
   2819                    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
   2820 def callv3nr: SDNode<"HexagonISD::CALLnr", SDT_SPCall,
   2821                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
   2822 
   2823 def: Pat<(callseq_start timm:$amt, timm:$amt2),
   2824          (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>;
   2825 def: Pat<(callseq_end timm:$amt1, timm:$amt2),
   2826          (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;
   2827 
   2828 def: Pat<(HexagonTCRet tglobaladdr:$dst),   (PS_tailcall_i tglobaladdr:$dst)>;
   2829 def: Pat<(HexagonTCRet texternalsym:$dst),  (PS_tailcall_i texternalsym:$dst)>;
   2830 def: Pat<(HexagonTCRet I32:$dst),           (PS_tailcall_r I32:$dst)>;
   2831 
   2832 def: Pat<(callv3 I32:$dst),                 (J2_callr I32:$dst)>;
   2833 def: Pat<(callv3 tglobaladdr:$dst),         (J2_call tglobaladdr:$dst)>;
   2834 def: Pat<(callv3 texternalsym:$dst),        (J2_call texternalsym:$dst)>;
   2835 def: Pat<(callv3 tglobaltlsaddr:$dst),      (J2_call tglobaltlsaddr:$dst)>;
   2836 
   2837 def: Pat<(callv3nr I32:$dst),               (PS_callr_nr I32:$dst)>;
   2838 def: Pat<(callv3nr tglobaladdr:$dst),       (PS_call_nr tglobaladdr:$dst)>;
   2839 def: Pat<(callv3nr texternalsym:$dst),      (PS_call_nr texternalsym:$dst)>;
   2840 
   2841 def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
   2842                      [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
   2843 def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
   2844 
   2845 def: Pat<(retflag),   (PS_jmpret (i32 R31))>;
   2846 def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>;
   2847 
   2848 
   2849 // --(16) Branch ---------------------------------------------------------
   2850 //
   2851 
   2852 def: Pat<(br      bb:$dst),         (J2_jump  b30_2Imm:$dst)>;
   2853 def: Pat<(brind   I32:$dst),        (J2_jumpr I32:$dst)>;
   2854 
   2855 def: Pat<(brcond I1:$Pu, bb:$dst),
   2856          (J2_jumpt I1:$Pu, bb:$dst)>;
   2857 def: Pat<(brcond (not I1:$Pu), bb:$dst),
   2858          (J2_jumpf I1:$Pu, bb:$dst)>;
   2859 def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
   2860          (J2_jumpf I1:$Pu, bb:$dst)>;
   2861 def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
   2862          (J2_jumpf I1:$Pu, bb:$dst)>;
   2863 def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
   2864          (J2_jumpt I1:$Pu, bb:$dst)>;
   2865 
   2866 
   2867 // --(17) Misc -----------------------------------------------------------
   2868 
   2869 
   2870 // Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
   2871 // for C code of the form r = (c>='0' && c<='9') ? 1 : 0.
   2872 // The isdigit transformation relies on two 'clever' aspects:
   2873 // 1) The data type is unsigned which allows us to eliminate a zero test after
   2874 //    biasing the expression by 48. We are depending on the representation of
   2875 //    the unsigned types, and semantics.
   2876 // 2) The front end has converted <= 9 into < 10 on entry to LLVM.
   2877 //
   2878 // For the C code:
   2879 //   retval = (c >= '0' && c <= '9') ? 1 : 0;
   2880 // The code is transformed upstream of llvm into
   2881 //   retval = (c-48) < 10 ? 1 : 0;
   2882 
   2883 def u7_0PosImmPred : ImmLeaf<i32, [{
   2884   // True if the immediate fits in an 7-bit unsigned field and is positive.
   2885   return Imm > 0 && isUInt<7>(Imm);
   2886 }]>;
   2887 
   2888 let AddedComplexity = 139 in
   2889 def: Pat<(i32 (zext (i1 (setult (and I32:$Rs, 255), u7_0PosImmPred:$u7)))),
   2890          (C2_muxii (A4_cmpbgtui IntRegs:$Rs, (UDEC1 imm:$u7)), 0, 1)>;
   2891 
   2892 let AddedComplexity = 100 in
   2893 def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
   2894                                      (i32 (extloadi8  (add I32:$b, 3))),
   2895                                      24, 8),
   2896                       (i32 16)),
   2897                  (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
   2898              (zextloadi8 I32:$b)),
   2899          (A2_swiz (L2_loadri_io I32:$b, 0))>;
   2900 
   2901 
   2902 // We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
   2903 // because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
   2904 // We don't really want either one here.
   2905 def SDTHexagonDCFETCH: SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
   2906 def HexagonDCFETCH: SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
   2907                            [SDNPHasChain]>;
   2908 
   2909 def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
   2910          (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
   2911 def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
   2912          (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
   2913 
   2914 def SDTHexagonALLOCA
   2915   : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
   2916 def HexagonALLOCA
   2917   : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, [SDNPHasChain]>;
   2918 
   2919 def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
   2920          (PS_alloca IntRegs:$Rs, imm:$A)>;
   2921 
   2922 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
   2923 def: Pat<(HexagonBARRIER), (Y2_barrier)>;
   2924 
   2925 // Read cycle counter.
   2926 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
   2927 def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
   2928   [SDNPHasChain]>;
   2929 
   2930 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
   2931 
   2932 // The declared return value of the store-locked intrinsics is i32, but
   2933 // the instructions actually define i1. To avoid register copies from
   2934 // IntRegs to PredRegs and back, fold the entire pattern checking the
   2935 // result against true/false.
   2936 let AddedComplexity = 100 in {
   2937   def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
   2938            (S2_storew_locked I32:$Rs, I32:$Rt)>;
   2939   def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
   2940            (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>;
   2941   def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
   2942            (S4_stored_locked I32:$Rs, I64:$Rt)>;
   2943   def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
   2944            (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
   2945 }
   2946