Home | History | Annotate | Download | only in msa
      1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
      2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
      3 
      4 define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
      5   ; CHECK: and_v16i8:
      6 
      7   %1 = load <16 x i8>, <16 x i8>* %a
      8   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
      9   %2 = load <16 x i8>, <16 x i8>* %b
     10   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     11   %3 = and <16 x i8> %1, %2
     12   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     13   store <16 x i8> %3, <16 x i8>* %c
     14   ; CHECK-DAG: st.b [[R3]], 0($4)
     15 
     16   ret void
     17   ; CHECK: .size and_v16i8
     18 }
     19 
     20 define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     21   ; CHECK: and_v8i16:
     22 
     23   %1 = load <8 x i16>, <8 x i16>* %a
     24   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     25   %2 = load <8 x i16>, <8 x i16>* %b
     26   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
     27   %3 = and <8 x i16> %1, %2
     28   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     29   store <8 x i16> %3, <8 x i16>* %c
     30   ; CHECK-DAG: st.h [[R3]], 0($4)
     31 
     32   ret void
     33   ; CHECK: .size and_v8i16
     34 }
     35 
     36 define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
     37   ; CHECK: and_v4i32:
     38 
     39   %1 = load <4 x i32>, <4 x i32>* %a
     40   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
     41   %2 = load <4 x i32>, <4 x i32>* %b
     42   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
     43   %3 = and <4 x i32> %1, %2
     44   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     45   store <4 x i32> %3, <4 x i32>* %c
     46   ; CHECK-DAG: st.w [[R3]], 0($4)
     47 
     48   ret void
     49   ; CHECK: .size and_v4i32
     50 }
     51 
     52 define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
     53   ; CHECK: and_v2i64:
     54 
     55   %1 = load <2 x i64>, <2 x i64>* %a
     56   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
     57   %2 = load <2 x i64>, <2 x i64>* %b
     58   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
     59   %3 = and <2 x i64> %1, %2
     60   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     61   store <2 x i64> %3, <2 x i64>* %c
     62   ; CHECK-DAG: st.d [[R3]], 0($4)
     63 
     64   ret void
     65   ; CHECK: .size and_v2i64
     66 }
     67 
     68 define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
     69   ; CHECK: and_v16i8_i:
     70 
     71   %1 = load <16 x i8>, <16 x i8>* %a
     72   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     73   %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
     74   ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
     75   store <16 x i8> %2, <16 x i8>* %c
     76   ; CHECK-DAG: st.b [[R4]], 0($4)
     77 
     78   ret void
     79   ; CHECK: .size and_v16i8_i
     80 }
     81 
     82 define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
     83   ; CHECK: and_v8i16_i:
     84 
     85   %1 = load <8 x i16>, <8 x i16>* %a
     86   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     87   %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
     88   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
     89   ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
     90   store <8 x i16> %2, <8 x i16>* %c
     91   ; CHECK-DAG: st.h [[R4]], 0($4)
     92 
     93   ret void
     94   ; CHECK: .size and_v8i16_i
     95 }
     96 
     97 define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
     98   ; CHECK: and_v4i32_i:
     99 
    100   %1 = load <4 x i32>, <4 x i32>* %a
    101   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    102   %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    103   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
    104   ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    105   store <4 x i32> %2, <4 x i32>* %c
    106   ; CHECK-DAG: st.w [[R4]], 0($4)
    107 
    108   ret void
    109   ; CHECK: .size and_v4i32_i
    110 }
    111 
    112 define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    113   ; CHECK: and_v2i64_i:
    114 
    115   %1 = load <2 x i64>, <2 x i64>* %a
    116   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    117   %2 = and <2 x i64> %1, <i64 1, i64 1>
    118   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
    119   ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    120   store <2 x i64> %2, <2 x i64>* %c
    121   ; CHECK-DAG: st.d [[R4]], 0($4)
    122 
    123   ret void
    124   ; CHECK: .size and_v2i64_i
    125 }
    126 
    127 define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    128   ; CHECK: or_v16i8:
    129 
    130   %1 = load <16 x i8>, <16 x i8>* %a
    131   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    132   %2 = load <16 x i8>, <16 x i8>* %b
    133   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    134   %3 = or <16 x i8> %1, %2
    135   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    136   store <16 x i8> %3, <16 x i8>* %c
    137   ; CHECK-DAG: st.b [[R3]], 0($4)
    138 
    139   ret void
    140   ; CHECK: .size or_v16i8
    141 }
    142 
    143 define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    144   ; CHECK: or_v8i16:
    145 
    146   %1 = load <8 x i16>, <8 x i16>* %a
    147   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    148   %2 = load <8 x i16>, <8 x i16>* %b
    149   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    150   %3 = or <8 x i16> %1, %2
    151   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    152   store <8 x i16> %3, <8 x i16>* %c
    153   ; CHECK-DAG: st.h [[R3]], 0($4)
    154 
    155   ret void
    156   ; CHECK: .size or_v8i16
    157 }
    158 
    159 define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    160   ; CHECK: or_v4i32:
    161 
    162   %1 = load <4 x i32>, <4 x i32>* %a
    163   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    164   %2 = load <4 x i32>, <4 x i32>* %b
    165   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    166   %3 = or <4 x i32> %1, %2
    167   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    168   store <4 x i32> %3, <4 x i32>* %c
    169   ; CHECK-DAG: st.w [[R3]], 0($4)
    170 
    171   ret void
    172   ; CHECK: .size or_v4i32
    173 }
    174 
    175 define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    176   ; CHECK: or_v2i64:
    177 
    178   %1 = load <2 x i64>, <2 x i64>* %a
    179   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    180   %2 = load <2 x i64>, <2 x i64>* %b
    181   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    182   %3 = or <2 x i64> %1, %2
    183   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    184   store <2 x i64> %3, <2 x i64>* %c
    185   ; CHECK-DAG: st.d [[R3]], 0($4)
    186 
    187   ret void
    188   ; CHECK: .size or_v2i64
    189 }
    190 
    191 define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    192   ; CHECK: or_v16i8_i:
    193 
    194   %1 = load <16 x i8>, <16 x i8>* %a
    195   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    196   %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    197   ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
    198   store <16 x i8> %2, <16 x i8>* %c
    199   ; CHECK-DAG: st.b [[R4]], 0($4)
    200 
    201   ret void
    202   ; CHECK: .size or_v16i8_i
    203 }
    204 
    205 define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    206   ; CHECK: or_v8i16_i:
    207 
    208   %1 = load <8 x i16>, <8 x i16>* %a
    209   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    210   %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    211   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
    212   ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    213   store <8 x i16> %2, <8 x i16>* %c
    214   ; CHECK-DAG: st.h [[R4]], 0($4)
    215 
    216   ret void
    217   ; CHECK: .size or_v8i16_i
    218 }
    219 
    220 define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    221   ; CHECK: or_v4i32_i:
    222 
    223   %1 = load <4 x i32>, <4 x i32>* %a
    224   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    225   %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
    226   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
    227   ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    228   store <4 x i32> %2, <4 x i32>* %c
    229   ; CHECK-DAG: st.w [[R4]], 0($4)
    230 
    231   ret void
    232   ; CHECK: .size or_v4i32_i
    233 }
    234 
    235 define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    236   ; CHECK: or_v2i64_i:
    237 
    238   %1 = load <2 x i64>, <2 x i64>* %a
    239   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    240   %2 = or <2 x i64> %1, <i64 3, i64 3>
    241   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
    242   ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    243   store <2 x i64> %2, <2 x i64>* %c
    244   ; CHECK-DAG: st.d [[R4]], 0($4)
    245 
    246   ret void
    247   ; CHECK: .size or_v2i64_i
    248 }
    249 
    250 define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    251   ; CHECK: nor_v16i8:
    252 
    253   %1 = load <16 x i8>, <16 x i8>* %a
    254   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    255   %2 = load <16 x i8>, <16 x i8>* %b
    256   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    257   %3 = or <16 x i8> %1, %2
    258   %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
    259   ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    260   store <16 x i8> %4, <16 x i8>* %c
    261   ; CHECK-DAG: st.b [[R3]], 0($4)
    262 
    263   ret void
    264   ; CHECK: .size nor_v16i8
    265 }
    266 
    267 define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    268   ; CHECK: nor_v8i16:
    269 
    270   %1 = load <8 x i16>, <8 x i16>* %a
    271   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    272   %2 = load <8 x i16>, <8 x i16>* %b
    273   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    274   %3 = or <8 x i16> %1, %2
    275   %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
    276   ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    277   store <8 x i16> %4, <8 x i16>* %c
    278   ; CHECK-DAG: st.h [[R3]], 0($4)
    279 
    280   ret void
    281   ; CHECK: .size nor_v8i16
    282 }
    283 
    284 define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    285   ; CHECK: nor_v4i32:
    286 
    287   %1 = load <4 x i32>, <4 x i32>* %a
    288   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    289   %2 = load <4 x i32>, <4 x i32>* %b
    290   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    291   %3 = or <4 x i32> %1, %2
    292   %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
    293   ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    294   store <4 x i32> %4, <4 x i32>* %c
    295   ; CHECK-DAG: st.w [[R3]], 0($4)
    296 
    297   ret void
    298   ; CHECK: .size nor_v4i32
    299 }
    300 
    301 define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    302   ; CHECK: nor_v2i64:
    303 
    304   %1 = load <2 x i64>, <2 x i64>* %a
    305   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    306   %2 = load <2 x i64>, <2 x i64>* %b
    307   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    308   %3 = or <2 x i64> %1, %2
    309   %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
    310   ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    311   store <2 x i64> %4, <2 x i64>* %c
    312   ; CHECK-DAG: st.d [[R3]], 0($4)
    313 
    314   ret void
    315   ; CHECK: .size nor_v2i64
    316 }
    317 
    318 define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    319   ; CHECK: nor_v16i8_i:
    320 
    321   %1 = load <16 x i8>, <16 x i8>* %a
    322   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    323   %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    324   %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
    325   ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
    326   store <16 x i8> %3, <16 x i8>* %c
    327   ; CHECK-DAG: st.b [[R4]], 0($4)
    328 
    329   ret void
    330   ; CHECK: .size nor_v16i8_i
    331 }
    332 
    333 define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    334   ; CHECK: nor_v8i16_i:
    335 
    336   %1 = load <8 x i16>, <8 x i16>* %a
    337   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    338   %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    339   %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
    340   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
    341   ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    342   store <8 x i16> %3, <8 x i16>* %c
    343   ; CHECK-DAG: st.h [[R4]], 0($4)
    344 
    345   ret void
    346   ; CHECK: .size nor_v8i16_i
    347 }
    348 
    349 define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    350   ; CHECK: nor_v4i32_i:
    351 
    352   %1 = load <4 x i32>, <4 x i32>* %a
    353   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    354   %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    355   %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
    356   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
    357   ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    358   store <4 x i32> %3, <4 x i32>* %c
    359   ; CHECK-DAG: st.w [[R4]], 0($4)
    360 
    361   ret void
    362   ; CHECK: .size nor_v4i32_i
    363 }
    364 
    365 define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    366   ; CHECK: nor_v2i64_i:
    367 
    368   %1 = load <2 x i64>, <2 x i64>* %a
    369   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    370   %2 = or <2 x i64> %1, <i64 1, i64 1>
    371   %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
    372   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
    373   ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    374   store <2 x i64> %3, <2 x i64>* %c
    375   ; CHECK-DAG: st.d [[R4]], 0($4)
    376 
    377   ret void
    378   ; CHECK: .size nor_v2i64_i
    379 }
    380 
    381 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    382   ; CHECK: xor_v16i8:
    383 
    384   %1 = load <16 x i8>, <16 x i8>* %a
    385   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    386   %2 = load <16 x i8>, <16 x i8>* %b
    387   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    388   %3 = xor <16 x i8> %1, %2
    389   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    390   store <16 x i8> %3, <16 x i8>* %c
    391   ; CHECK-DAG: st.b [[R3]], 0($4)
    392 
    393   ret void
    394   ; CHECK: .size xor_v16i8
    395 }
    396 
    397 define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    398   ; CHECK: xor_v8i16:
    399 
    400   %1 = load <8 x i16>, <8 x i16>* %a
    401   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    402   %2 = load <8 x i16>, <8 x i16>* %b
    403   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    404   %3 = xor <8 x i16> %1, %2
    405   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    406   store <8 x i16> %3, <8 x i16>* %c
    407   ; CHECK-DAG: st.h [[R3]], 0($4)
    408 
    409   ret void
    410   ; CHECK: .size xor_v8i16
    411 }
    412 
    413 define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    414   ; CHECK: xor_v4i32:
    415 
    416   %1 = load <4 x i32>, <4 x i32>* %a
    417   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    418   %2 = load <4 x i32>, <4 x i32>* %b
    419   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    420   %3 = xor <4 x i32> %1, %2
    421   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    422   store <4 x i32> %3, <4 x i32>* %c
    423   ; CHECK-DAG: st.w [[R3]], 0($4)
    424 
    425   ret void
    426   ; CHECK: .size xor_v4i32
    427 }
    428 
    429 define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    430   ; CHECK: xor_v2i64:
    431 
    432   %1 = load <2 x i64>, <2 x i64>* %a
    433   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    434   %2 = load <2 x i64>, <2 x i64>* %b
    435   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    436   %3 = xor <2 x i64> %1, %2
    437   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    438   store <2 x i64> %3, <2 x i64>* %c
    439   ; CHECK-DAG: st.d [[R3]], 0($4)
    440 
    441   ret void
    442   ; CHECK: .size xor_v2i64
    443 }
    444 
    445 define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    446   ; CHECK: xor_v16i8_i:
    447 
    448   %1 = load <16 x i8>, <16 x i8>* %a
    449   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    450   %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    451   ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
    452   store <16 x i8> %2, <16 x i8>* %c
    453   ; CHECK-DAG: st.b [[R4]], 0($4)
    454 
    455   ret void
    456   ; CHECK: .size xor_v16i8_i
    457 }
    458 
    459 define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    460   ; CHECK: xor_v8i16_i:
    461 
    462   %1 = load <8 x i16>, <8 x i16>* %a
    463   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    464   %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    465   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
    466   ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    467   store <8 x i16> %2, <8 x i16>* %c
    468   ; CHECK-DAG: st.h [[R4]], 0($4)
    469 
    470   ret void
    471   ; CHECK: .size xor_v8i16_i
    472 }
    473 
    474 define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    475   ; CHECK: xor_v4i32_i:
    476 
    477   %1 = load <4 x i32>, <4 x i32>* %a
    478   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    479   %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
    480   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
    481   ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    482   store <4 x i32> %2, <4 x i32>* %c
    483   ; CHECK-DAG: st.w [[R4]], 0($4)
    484 
    485   ret void
    486   ; CHECK: .size xor_v4i32_i
    487 }
    488 
    489 define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    490   ; CHECK: xor_v2i64_i:
    491 
    492   %1 = load <2 x i64>, <2 x i64>* %a
    493   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    494   %2 = xor <2 x i64> %1, <i64 3, i64 3>
    495   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
    496   ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
    497   store <2 x i64> %2, <2 x i64>* %c
    498   ; CHECK-DAG: st.d [[R4]], 0($4)
    499 
    500   ret void
    501   ; CHECK: .size xor_v2i64_i
    502 }
    503 
    504 define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    505   ; CHECK: sll_v16i8:
    506 
    507   %1 = load <16 x i8>, <16 x i8>* %a
    508   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    509   %2 = load <16 x i8>, <16 x i8>* %b
    510   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    511   %3 = shl <16 x i8> %1, %2
    512   ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    513   store <16 x i8> %3, <16 x i8>* %c
    514   ; CHECK-DAG: st.b [[R3]], 0($4)
    515 
    516   ret void
    517   ; CHECK: .size sll_v16i8
    518 }
    519 
    520 define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    521   ; CHECK: sll_v8i16:
    522 
    523   %1 = load <8 x i16>, <8 x i16>* %a
    524   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    525   %2 = load <8 x i16>, <8 x i16>* %b
    526   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    527   %3 = shl <8 x i16> %1, %2
    528   ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    529   store <8 x i16> %3, <8 x i16>* %c
    530   ; CHECK-DAG: st.h [[R3]], 0($4)
    531 
    532   ret void
    533   ; CHECK: .size sll_v8i16
    534 }
    535 
    536 define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    537   ; CHECK: sll_v4i32:
    538 
    539   %1 = load <4 x i32>, <4 x i32>* %a
    540   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    541   %2 = load <4 x i32>, <4 x i32>* %b
    542   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    543   %3 = shl <4 x i32> %1, %2
    544   ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    545   store <4 x i32> %3, <4 x i32>* %c
    546   ; CHECK-DAG: st.w [[R3]], 0($4)
    547 
    548   ret void
    549   ; CHECK: .size sll_v4i32
    550 }
    551 
    552 define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    553   ; CHECK: sll_v2i64:
    554 
    555   %1 = load <2 x i64>, <2 x i64>* %a
    556   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    557   %2 = load <2 x i64>, <2 x i64>* %b
    558   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    559   %3 = shl <2 x i64> %1, %2
    560   ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    561   store <2 x i64> %3, <2 x i64>* %c
    562   ; CHECK-DAG: st.d [[R3]], 0($4)
    563 
    564   ret void
    565   ; CHECK: .size sll_v2i64
    566 }
    567 
    568 define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    569   ; CHECK: sll_v16i8_i:
    570 
    571   %1 = load <16 x i8>, <16 x i8>* %a
    572   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    573   %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    574   ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
    575   store <16 x i8> %2, <16 x i8>* %c
    576   ; CHECK-DAG: st.b [[R4]], 0($4)
    577 
    578   ret void
    579   ; CHECK: .size sll_v16i8_i
    580 }
    581 
    582 define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    583   ; CHECK: sll_v8i16_i:
    584 
    585   %1 = load <8 x i16>, <8 x i16>* %a
    586   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    587   %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    588   ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
    589   store <8 x i16> %2, <8 x i16>* %c
    590   ; CHECK-DAG: st.h [[R4]], 0($4)
    591 
    592   ret void
    593   ; CHECK: .size sll_v8i16_i
    594 }
    595 
    596 define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    597   ; CHECK: sll_v4i32_i:
    598 
    599   %1 = load <4 x i32>, <4 x i32>* %a
    600   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    601   %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    602   ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
    603   store <4 x i32> %2, <4 x i32>* %c
    604   ; CHECK-DAG: st.w [[R4]], 0($4)
    605 
    606   ret void
    607   ; CHECK: .size sll_v4i32_i
    608 }
    609 
    610 define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    611   ; CHECK: sll_v2i64_i:
    612 
    613   %1 = load <2 x i64>, <2 x i64>* %a
    614   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    615   %2 = shl <2 x i64> %1, <i64 1, i64 1>
    616   ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
    617   store <2 x i64> %2, <2 x i64>* %c
    618   ; CHECK-DAG: st.d [[R4]], 0($4)
    619 
    620   ret void
    621   ; CHECK: .size sll_v2i64_i
    622 }
    623 
    624 define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    625   ; CHECK: sra_v16i8:
    626 
    627   %1 = load <16 x i8>, <16 x i8>* %a
    628   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    629   %2 = load <16 x i8>, <16 x i8>* %b
    630   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    631   %3 = ashr <16 x i8> %1, %2
    632   ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    633   store <16 x i8> %3, <16 x i8>* %c
    634   ; CHECK-DAG: st.b [[R3]], 0($4)
    635 
    636   ret void
    637   ; CHECK: .size sra_v16i8
    638 }
    639 
    640 define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    641   ; CHECK: sra_v8i16:
    642 
    643   %1 = load <8 x i16>, <8 x i16>* %a
    644   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    645   %2 = load <8 x i16>, <8 x i16>* %b
    646   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    647   %3 = ashr <8 x i16> %1, %2
    648   ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    649   store <8 x i16> %3, <8 x i16>* %c
    650   ; CHECK-DAG: st.h [[R3]], 0($4)
    651 
    652   ret void
    653   ; CHECK: .size sra_v8i16
    654 }
    655 
    656 define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    657   ; CHECK: sra_v4i32:
    658 
    659   %1 = load <4 x i32>, <4 x i32>* %a
    660   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    661   %2 = load <4 x i32>, <4 x i32>* %b
    662   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    663   %3 = ashr <4 x i32> %1, %2
    664   ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    665   store <4 x i32> %3, <4 x i32>* %c
    666   ; CHECK-DAG: st.w [[R3]], 0($4)
    667 
    668   ret void
    669   ; CHECK: .size sra_v4i32
    670 }
    671 
    672 define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    673   ; CHECK: sra_v2i64:
    674 
    675   %1 = load <2 x i64>, <2 x i64>* %a
    676   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    677   %2 = load <2 x i64>, <2 x i64>* %b
    678   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    679   %3 = ashr <2 x i64> %1, %2
    680   ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    681   store <2 x i64> %3, <2 x i64>* %c
    682   ; CHECK-DAG: st.d [[R3]], 0($4)
    683 
    684   ret void
    685   ; CHECK: .size sra_v2i64
    686 }
    687 
    688 define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    689   ; CHECK: sra_v16i8_i:
    690 
    691   %1 = load <16 x i8>, <16 x i8>* %a
    692   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    693   %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    694   ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
    695   store <16 x i8> %2, <16 x i8>* %c
    696   ; CHECK-DAG: st.b [[R4]], 0($4)
    697 
    698   ret void
    699   ; CHECK: .size sra_v16i8_i
    700 }
    701 
    702 define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    703   ; CHECK: sra_v8i16_i:
    704 
    705   %1 = load <8 x i16>, <8 x i16>* %a
    706   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    707   %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    708   ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
    709   store <8 x i16> %2, <8 x i16>* %c
    710   ; CHECK-DAG: st.h [[R4]], 0($4)
    711 
    712   ret void
    713   ; CHECK: .size sra_v8i16_i
    714 }
    715 
    716 define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    717   ; CHECK: sra_v4i32_i:
    718 
    719   %1 = load <4 x i32>, <4 x i32>* %a
    720   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    721   %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    722   ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
    723   store <4 x i32> %2, <4 x i32>* %c
    724   ; CHECK-DAG: st.w [[R4]], 0($4)
    725 
    726   ret void
    727   ; CHECK: .size sra_v4i32_i
    728 }
    729 
    730 define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    731   ; CHECK: sra_v2i64_i:
    732 
    733   %1 = load <2 x i64>, <2 x i64>* %a
    734   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    735   %2 = ashr <2 x i64> %1, <i64 1, i64 1>
    736   ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
    737   store <2 x i64> %2, <2 x i64>* %c
    738   ; CHECK-DAG: st.d [[R4]], 0($4)
    739 
    740   ret void
    741   ; CHECK: .size sra_v2i64_i
    742 }
    743 
    744 define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    745   ; CHECK: srl_v16i8:
    746 
    747   %1 = load <16 x i8>, <16 x i8>* %a
    748   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    749   %2 = load <16 x i8>, <16 x i8>* %b
    750   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    751   %3 = lshr <16 x i8> %1, %2
    752   ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    753   store <16 x i8> %3, <16 x i8>* %c
    754   ; CHECK-DAG: st.b [[R3]], 0($4)
    755 
    756   ret void
    757   ; CHECK: .size srl_v16i8
    758 }
    759 
    760 define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    761   ; CHECK: srl_v8i16:
    762 
    763   %1 = load <8 x i16>, <8 x i16>* %a
    764   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    765   %2 = load <8 x i16>, <8 x i16>* %b
    766   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    767   %3 = lshr <8 x i16> %1, %2
    768   ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    769   store <8 x i16> %3, <8 x i16>* %c
    770   ; CHECK-DAG: st.h [[R3]], 0($4)
    771 
    772   ret void
    773   ; CHECK: .size srl_v8i16
    774 }
    775 
    776 define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    777   ; CHECK: srl_v4i32:
    778 
    779   %1 = load <4 x i32>, <4 x i32>* %a
    780   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    781   %2 = load <4 x i32>, <4 x i32>* %b
    782   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    783   %3 = lshr <4 x i32> %1, %2
    784   ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    785   store <4 x i32> %3, <4 x i32>* %c
    786   ; CHECK-DAG: st.w [[R3]], 0($4)
    787 
    788   ret void
    789   ; CHECK: .size srl_v4i32
    790 }
    791 
    792 define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    793   ; CHECK: srl_v2i64:
    794 
    795   %1 = load <2 x i64>, <2 x i64>* %a
    796   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    797   %2 = load <2 x i64>, <2 x i64>* %b
    798   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    799   %3 = lshr <2 x i64> %1, %2
    800   ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    801   store <2 x i64> %3, <2 x i64>* %c
    802   ; CHECK-DAG: st.d [[R3]], 0($4)
    803 
    804   ret void
    805   ; CHECK: .size srl_v2i64
    806 }
    807 
    808 define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    809   ; CHECK: srl_v16i8_i:
    810 
    811   %1 = load <16 x i8>, <16 x i8>* %a
    812   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    813   %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    814   ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
    815   store <16 x i8> %2, <16 x i8>* %c
    816   ; CHECK-DAG: st.b [[R4]], 0($4)
    817 
    818   ret void
    819   ; CHECK: .size srl_v16i8_i
    820 }
    821 
    822 define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    823   ; CHECK: srl_v8i16_i:
    824 
    825   %1 = load <8 x i16>, <8 x i16>* %a
    826   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    827   %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    828   ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
    829   store <8 x i16> %2, <8 x i16>* %c
    830   ; CHECK-DAG: st.h [[R4]], 0($4)
    831 
    832   ret void
    833   ; CHECK: .size srl_v8i16_i
    834 }
    835 
    836 define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    837   ; CHECK: srl_v4i32_i:
    838 
    839   %1 = load <4 x i32>, <4 x i32>* %a
    840   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    841   %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    842   ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
    843   store <4 x i32> %2, <4 x i32>* %c
    844   ; CHECK-DAG: st.w [[R4]], 0($4)
    845 
    846   ret void
    847   ; CHECK: .size srl_v4i32_i
    848 }
    849 
    850 define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    851   ; CHECK: srl_v2i64_i:
    852 
    853   %1 = load <2 x i64>, <2 x i64>* %a
    854   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    855   %2 = lshr <2 x i64> %1, <i64 1, i64 1>
    856   ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
    857   store <2 x i64> %2, <2 x i64>* %c
    858   ; CHECK-DAG: st.d [[R4]], 0($4)
    859 
    860   ret void
    861   ; CHECK: .size srl_v2i64_i
    862 }
    863 
    864 define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    865   ; CHECK: ctpop_v16i8:
    866 
    867   %1 = load <16 x i8>, <16 x i8>* %a
    868   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    869   %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
    870   ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
    871   store <16 x i8> %2, <16 x i8>* %c
    872   ; CHECK-DAG: st.b [[R3]], 0($4)
    873 
    874   ret void
    875   ; CHECK: .size ctpop_v16i8
    876 }
    877 
    878 define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    879   ; CHECK: ctpop_v8i16:
    880 
    881   %1 = load <8 x i16>, <8 x i16>* %a
    882   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    883   %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
    884   ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
    885   store <8 x i16> %2, <8 x i16>* %c
    886   ; CHECK-DAG: st.h [[R3]], 0($4)
    887 
    888   ret void
    889   ; CHECK: .size ctpop_v8i16
    890 }
    891 
    892 define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    893   ; CHECK: ctpop_v4i32:
    894 
    895   %1 = load <4 x i32>, <4 x i32>* %a
    896   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    897   %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
    898   ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
    899   store <4 x i32> %2, <4 x i32>* %c
    900   ; CHECK-DAG: st.w [[R3]], 0($4)
    901 
    902   ret void
    903   ; CHECK: .size ctpop_v4i32
    904 }
    905 
    906 define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    907   ; CHECK: ctpop_v2i64:
    908 
    909   %1 = load <2 x i64>, <2 x i64>* %a
    910   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    911   %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
    912   ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
    913   store <2 x i64> %2, <2 x i64>* %c
    914   ; CHECK-DAG: st.d [[R3]], 0($4)
    915 
    916   ret void
    917   ; CHECK: .size ctpop_v2i64
    918 }
    919 
    920 define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    921   ; CHECK: ctlz_v16i8:
    922 
    923   %1 = load <16 x i8>, <16 x i8>* %a
    924   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    925   %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
    926   ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
    927   store <16 x i8> %2, <16 x i8>* %c
    928   ; CHECK-DAG: st.b [[R3]], 0($4)
    929 
    930   ret void
    931   ; CHECK: .size ctlz_v16i8
    932 }
    933 
    934 define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    935   ; CHECK: ctlz_v8i16:
    936 
    937   %1 = load <8 x i16>, <8 x i16>* %a
    938   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    939   %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
    940   ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
    941   store <8 x i16> %2, <8 x i16>* %c
    942   ; CHECK-DAG: st.h [[R3]], 0($4)
    943 
    944   ret void
    945   ; CHECK: .size ctlz_v8i16
    946 }
    947 
    948 define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    949   ; CHECK: ctlz_v4i32:
    950 
    951   %1 = load <4 x i32>, <4 x i32>* %a
    952   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    953   %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
    954   ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
    955   store <4 x i32> %2, <4 x i32>* %c
    956   ; CHECK-DAG: st.w [[R3]], 0($4)
    957 
    958   ret void
    959   ; CHECK: .size ctlz_v4i32
    960 }
    961 
    962 define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    963   ; CHECK: ctlz_v2i64:
    964 
    965   %1 = load <2 x i64>, <2 x i64>* %a
    966   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    967   %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
    968   ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
    969   store <2 x i64> %2, <2 x i64>* %c
    970   ; CHECK-DAG: st.d [[R3]], 0($4)
    971 
    972   ret void
    973   ; CHECK: .size ctlz_v2i64
    974 }
    975 
    976 define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
    977   ; CHECK: bsel_v16i8:
    978 
    979   %1 = load <16 x i8>, <16 x i8>* %a
    980   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    981   %2 = load <16 x i8>, <16 x i8>* %b
    982   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    983   %3 = load <16 x i8>, <16 x i8>* %m
    984   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
    985   %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
    986                           i8 -1, i8 -1, i8 -1, i8 -1,
    987                           i8 -1, i8 -1, i8 -1, i8 -1,
    988                           i8 -1, i8 -1, i8 -1, i8 -1>
    989   %5 = and <16 x i8> %1, %3
    990   %6 = and <16 x i8> %2, %4
    991   %7 = or <16 x i8> %5, %6
    992   ; bmnz is the same operation
    993   ; (vselect Mask, IfSet, IfClr) -> (BMNZ IfClr, IfSet, Mask)
    994   ; CHECK-DAG: bmnz.v [[R2]], [[R1]], [[R3]]
    995   store <16 x i8> %7, <16 x i8>* %c
    996   ; CHECK-DAG: st.b [[R2]], 0($4)
    997 
    998   ret void
    999   ; CHECK: .size bsel_v16i8
   1000 }
   1001 
   1002 define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
   1003   ; CHECK: bsel_v16i8_i:
   1004 
   1005   %1 = load <16 x i8>, <16 x i8>* %a
   1006   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1007   %2 = load <16 x i8>, <16 x i8>* %m
   1008   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
   1009   %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
   1010                           i8 -1, i8 -1, i8 -1, i8 -1,
   1011                           i8 -1, i8 -1, i8 -1, i8 -1,
   1012                           i8 -1, i8 -1, i8 -1, i8 -1>
   1013   %4 = and <16 x i8> %1, %3
   1014   %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
   1015                       i8 6, i8 6, i8 6, i8 6,
   1016                       i8 6, i8 6, i8 6, i8 6,
   1017                       i8 6, i8 6, i8 6, i8 6>, %2
   1018   %6 = or <16 x i8> %4, %5
   1019   ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
   1020   store <16 x i8> %6, <16 x i8>* %c
   1021   ; CHECK-DAG: st.b [[R3]], 0($4)
   1022 
   1023   ret void
   1024   ; CHECK: .size bsel_v16i8_i
   1025 }
   1026 
   1027 define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1028   ; CHECK: bsel_v8i16:
   1029 
   1030   %1 = load <8 x i16>, <8 x i16>* %a
   1031   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1032   %2 = load <8 x i16>, <8 x i16>* %b
   1033   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1034   %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
   1035                           i16 6, i16 6, i16 6, i16 6>
   1036   %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
   1037                           i16 65529, i16 65529, i16 65529, i16 65529>
   1038   %5 = or <8 x i16> %3, %4
   1039   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
   1040   ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
   1041   store <8 x i16> %5, <8 x i16>* %c
   1042   ; CHECK-DAG: st.h [[R3]], 0($4)
   1043 
   1044   ret void
   1045   ; CHECK: .size bsel_v8i16
   1046 }
   1047 
   1048 define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1049   ; CHECK: bsel_v4i32:
   1050 
   1051   %1 = load <4 x i32>, <4 x i32>* %a
   1052   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1053   %2 = load <4 x i32>, <4 x i32>* %b
   1054   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1055   %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
   1056   %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
   1057   %5 = or <4 x i32> %3, %4
   1058   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
   1059   ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
   1060   store <4 x i32> %5, <4 x i32>* %c
   1061   ; CHECK-DAG: st.w [[R3]], 0($4)
   1062 
   1063   ret void
   1064   ; CHECK: .size bsel_v4i32
   1065 }
   1066 
   1067 define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1068   ; CHECK: bsel_v2i64:
   1069 
   1070   %1 = load <2 x i64>, <2 x i64>* %a
   1071   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1072   %2 = load <2 x i64>, <2 x i64>* %b
   1073   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1074   %3 = and <2 x i64> %1, <i64 6, i64 6>
   1075   %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
   1076   %5 = or <2 x i64> %3, %4
   1077   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
   1078   ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
   1079   store <2 x i64> %5, <2 x i64>* %c
   1080   ; CHECK-DAG: st.d [[R3]], 0($4)
   1081 
   1082   ret void
   1083   ; CHECK: .size bsel_v2i64
   1084 }
   1085 
   1086 define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1087   ; CHECK: binsl_v16i8_i:
   1088 
   1089   %1 = load <16 x i8>, <16 x i8>* %a
   1090   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1091   %2 = load <16 x i8>, <16 x i8>* %b
   1092   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1093   %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
   1094                           i8 192, i8 192, i8 192, i8 192,
   1095                           i8 192, i8 192, i8 192, i8 192,
   1096                           i8 192, i8 192, i8 192, i8 192>
   1097   %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
   1098                           i8 63, i8 63, i8 63, i8 63,
   1099                           i8 63, i8 63, i8 63, i8 63,
   1100                           i8 63, i8 63, i8 63, i8 63>
   1101   %5 = or <16 x i8> %3, %4
   1102   ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2
   1103   store <16 x i8> %5, <16 x i8>* %c
   1104   ; CHECK-DAG: st.b [[R2]], 0($4)
   1105 
   1106   ret void
   1107   ; CHECK: .size binsl_v16i8_i
   1108 }
   1109 
   1110 define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1111   ; CHECK: binsl_v8i16_i:
   1112 
   1113   %1 = load <8 x i16>, <8 x i16>* %a
   1114   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1115   %2 = load <8 x i16>, <8 x i16>* %b
   1116   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1117   %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
   1118                           i16 49152, i16 49152, i16 49152, i16 49152>
   1119   %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
   1120                           i16 16383, i16 16383, i16 16383, i16 16383>
   1121   %5 = or <8 x i16> %3, %4
   1122   ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2
   1123   store <8 x i16> %5, <8 x i16>* %c
   1124   ; CHECK-DAG: st.h [[R2]], 0($4)
   1125 
   1126   ret void
   1127   ; CHECK: .size binsl_v8i16_i
   1128 }
   1129 
   1130 define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1131   ; CHECK: binsl_v4i32_i:
   1132 
   1133   %1 = load <4 x i32>, <4 x i32>* %a
   1134   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1135   %2 = load <4 x i32>, <4 x i32>* %b
   1136   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1137   %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
   1138   %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
   1139   %5 = or <4 x i32> %3, %4
   1140   ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2
   1141   store <4 x i32> %5, <4 x i32>* %c
   1142   ; CHECK-DAG: st.w [[R2]], 0($4)
   1143 
   1144   ret void
   1145   ; CHECK: .size binsl_v4i32_i
   1146 }
   1147 
   1148 define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1149   ; CHECK: binsl_v2i64_i:
   1150 
   1151   %1 = load <2 x i64>, <2 x i64>* %a
   1152   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1153   %2 = load <2 x i64>, <2 x i64>* %b
   1154   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1155   %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
   1156   %4 = and <2 x i64> %2, <i64 7, i64 7>
   1157   %5 = or <2 x i64> %3, %4
   1158   ; TODO: We use a particularly wide mask here to work around a legalization
   1159   ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
   1160   ;       legalized into a constant pool. We should add a test to cover the
   1161   ;       other cases once they correctly select binsli.d.
   1162   ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61
   1163   store <2 x i64> %5, <2 x i64>* %c
   1164   ; CHECK-DAG: st.d [[R2]], 0($4)
   1165 
   1166   ret void
   1167   ; CHECK: .size binsl_v2i64_i
   1168 }
   1169 
   1170 define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1171   ; CHECK: binsr_v16i8_i:
   1172 
   1173   %1 = load <16 x i8>, <16 x i8>* %a
   1174   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1175   %2 = load <16 x i8>, <16 x i8>* %b
   1176   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1177   %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
   1178                           i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   1179   %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
   1180                           i8 252, i8 252, i8 252, i8 252,
   1181                           i8 252, i8 252, i8 252, i8 252,
   1182                           i8 252, i8 252, i8 252, i8 252>
   1183   %5 = or <16 x i8> %3, %4
   1184   ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2
   1185   store <16 x i8> %5, <16 x i8>* %c
   1186   ; CHECK-DAG: st.b [[R2]], 0($4)
   1187 
   1188   ret void
   1189   ; CHECK: .size binsr_v16i8_i
   1190 }
   1191 
   1192 define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1193   ; CHECK: binsr_v8i16_i:
   1194 
   1195   %1 = load <8 x i16>, <8 x i16>* %a
   1196   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1197   %2 = load <8 x i16>, <8 x i16>* %b
   1198   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1199   %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
   1200                           i16 3, i16 3, i16 3, i16 3>
   1201   %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
   1202                           i16 65532, i16 65532, i16 65532, i16 65532>
   1203   %5 = or <8 x i16> %3, %4
   1204   ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2
   1205   store <8 x i16> %5, <8 x i16>* %c
   1206   ; CHECK-DAG: st.h [[R2]], 0($4)
   1207 
   1208   ret void
   1209   ; CHECK: .size binsr_v8i16_i
   1210 }
   1211 
   1212 define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1213   ; CHECK: binsr_v4i32_i:
   1214 
   1215   %1 = load <4 x i32>, <4 x i32>* %a
   1216   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1217   %2 = load <4 x i32>, <4 x i32>* %b
   1218   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1219   %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
   1220   %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
   1221   %5 = or <4 x i32> %3, %4
   1222   ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2
   1223   store <4 x i32> %5, <4 x i32>* %c
   1224   ; CHECK-DAG: st.w [[R2]], 0($4)
   1225 
   1226   ret void
   1227   ; CHECK: .size binsr_v4i32_i
   1228 }
   1229 
   1230 define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1231   ; CHECK: binsr_v2i64_i:
   1232 
   1233   %1 = load <2 x i64>, <2 x i64>* %a
   1234   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1235   %2 = load <2 x i64>, <2 x i64>* %b
   1236   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1237   %3 = and <2 x i64> %1, <i64 3, i64 3>
   1238   %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
   1239   %5 = or <2 x i64> %3, %4
   1240   ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2
   1241   store <2 x i64> %5, <2 x i64>* %c
   1242   ; CHECK-DAG: st.d [[R2]], 0($4)
   1243 
   1244   ret void
   1245   ; CHECK: .size binsr_v2i64_i
   1246 }
   1247 
   1248 define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1249   ; CHECK: bclr_v16i8:
   1250 
   1251   %1 = load <16 x i8>, <16 x i8>* %a
   1252   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1253   %2 = load <16 x i8>, <16 x i8>* %b
   1254   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1255   %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
   1256   %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1257   %5 = and <16 x i8> %1, %4
   1258   ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1259   store <16 x i8> %5, <16 x i8>* %c
   1260   ; CHECK-DAG: st.b [[R3]], 0($4)
   1261 
   1262   ret void
   1263   ; CHECK: .size bclr_v16i8
   1264 }
   1265 
   1266 define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1267   ; CHECK: bclr_v8i16:
   1268 
   1269   %1 = load <8 x i16>, <8 x i16>* %a
   1270   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1271   %2 = load <8 x i16>, <8 x i16>* %b
   1272   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1273   %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
   1274   %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   1275   %5 = and <8 x i16> %1, %4
   1276   ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1277   store <8 x i16> %5, <8 x i16>* %c
   1278   ; CHECK-DAG: st.h [[R3]], 0($4)
   1279 
   1280   ret void
   1281   ; CHECK: .size bclr_v8i16
   1282 }
   1283 
   1284 define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1285   ; CHECK: bclr_v4i32:
   1286 
   1287   %1 = load <4 x i32>, <4 x i32>* %a
   1288   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1289   %2 = load <4 x i32>, <4 x i32>* %b
   1290   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1291   %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
   1292   %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
   1293   %5 = and <4 x i32> %1, %4
   1294   ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1295   store <4 x i32> %5, <4 x i32>* %c
   1296   ; CHECK-DAG: st.w [[R3]], 0($4)
   1297 
   1298   ret void
   1299   ; CHECK: .size bclr_v4i32
   1300 }
   1301 
   1302 define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1303   ; CHECK: bclr_v2i64:
   1304 
   1305   %1 = load <2 x i64>, <2 x i64>* %a
   1306   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1307   %2 = load <2 x i64>, <2 x i64>* %b
   1308   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1309   %3 = shl <2 x i64> <i64 1, i64 1>, %2
   1310   %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
   1311   %5 = and <2 x i64> %1, %4
   1312   ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1313   store <2 x i64> %5, <2 x i64>* %c
   1314   ; CHECK-DAG: st.d [[R3]], 0($4)
   1315 
   1316   ret void
   1317   ; CHECK: .size bclr_v2i64
   1318 }
   1319 
   1320 define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1321   ; CHECK: bset_v16i8:
   1322 
   1323   %1 = load <16 x i8>, <16 x i8>* %a
   1324   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1325   %2 = load <16 x i8>, <16 x i8>* %b
   1326   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1327   %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
   1328   %4 = or <16 x i8> %1, %3
   1329   ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1330   store <16 x i8> %4, <16 x i8>* %c
   1331   ; CHECK-DAG: st.b [[R3]], 0($4)
   1332 
   1333   ret void
   1334   ; CHECK: .size bset_v16i8
   1335 }
   1336 
   1337 define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1338   ; CHECK: bset_v8i16:
   1339 
   1340   %1 = load <8 x i16>, <8 x i16>* %a
   1341   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1342   %2 = load <8 x i16>, <8 x i16>* %b
   1343   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1344   %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
   1345   %4 = or <8 x i16> %1, %3
   1346   ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1347   store <8 x i16> %4, <8 x i16>* %c
   1348   ; CHECK-DAG: st.h [[R3]], 0($4)
   1349 
   1350   ret void
   1351   ; CHECK: .size bset_v8i16
   1352 }
   1353 
   1354 define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1355   ; CHECK: bset_v4i32:
   1356 
   1357   %1 = load <4 x i32>, <4 x i32>* %a
   1358   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1359   %2 = load <4 x i32>, <4 x i32>* %b
   1360   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1361   %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
   1362   %4 = or <4 x i32> %1, %3
   1363   ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1364   store <4 x i32> %4, <4 x i32>* %c
   1365   ; CHECK-DAG: st.w [[R3]], 0($4)
   1366 
   1367   ret void
   1368   ; CHECK: .size bset_v4i32
   1369 }
   1370 
   1371 define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1372   ; CHECK: bset_v2i64:
   1373 
   1374   %1 = load <2 x i64>, <2 x i64>* %a
   1375   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1376   %2 = load <2 x i64>, <2 x i64>* %b
   1377   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1378   %3 = shl <2 x i64> <i64 1, i64 1>, %2
   1379   %4 = or <2 x i64> %1, %3
   1380   ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1381   store <2 x i64> %4, <2 x i64>* %c
   1382   ; CHECK-DAG: st.d [[R3]], 0($4)
   1383 
   1384   ret void
   1385   ; CHECK: .size bset_v2i64
   1386 }
   1387 
   1388 define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1389   ; CHECK: bneg_v16i8:
   1390 
   1391   %1 = load <16 x i8>, <16 x i8>* %a
   1392   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1393   %2 = load <16 x i8>, <16 x i8>* %b
   1394   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1395   %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
   1396   %4 = xor <16 x i8> %1, %3
   1397   ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1398   store <16 x i8> %4, <16 x i8>* %c
   1399   ; CHECK-DAG: st.b [[R3]], 0($4)
   1400 
   1401   ret void
   1402   ; CHECK: .size bneg_v16i8
   1403 }
   1404 
   1405 define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1406   ; CHECK: bneg_v8i16:
   1407 
   1408   %1 = load <8 x i16>, <8 x i16>* %a
   1409   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1410   %2 = load <8 x i16>, <8 x i16>* %b
   1411   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1412   %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
   1413   %4 = xor <8 x i16> %1, %3
   1414   ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1415   store <8 x i16> %4, <8 x i16>* %c
   1416   ; CHECK-DAG: st.h [[R3]], 0($4)
   1417 
   1418   ret void
   1419   ; CHECK: .size bneg_v8i16
   1420 }
   1421 
   1422 define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1423   ; CHECK: bneg_v4i32:
   1424 
   1425   %1 = load <4 x i32>, <4 x i32>* %a
   1426   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1427   %2 = load <4 x i32>, <4 x i32>* %b
   1428   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1429   %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
   1430   %4 = xor <4 x i32> %1, %3
   1431   ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1432   store <4 x i32> %4, <4 x i32>* %c
   1433   ; CHECK-DAG: st.w [[R3]], 0($4)
   1434 
   1435   ret void
   1436   ; CHECK: .size bneg_v4i32
   1437 }
   1438 
   1439 define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1440   ; CHECK: bneg_v2i64:
   1441 
   1442   %1 = load <2 x i64>, <2 x i64>* %a
   1443   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1444   %2 = load <2 x i64>, <2 x i64>* %b
   1445   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1446   %3 = shl <2 x i64> <i64 1, i64 1>, %2
   1447   %4 = xor <2 x i64> %1, %3
   1448   ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1449   store <2 x i64> %4, <2 x i64>* %c
   1450   ; CHECK-DAG: st.d [[R3]], 0($4)
   1451 
   1452   ret void
   1453   ; CHECK: .size bneg_v2i64
   1454 }
   1455 
   1456 define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1457   ; CHECK: bclri_v16i8:
   1458 
   1459   %1 = load <16 x i8>, <16 x i8>* %a
   1460   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1461   %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
   1462                      <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1463   %3 = and <16 x i8> %1, %2
   1464   ; bclri.b and andi.b are exactly equivalent.
   1465   ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
   1466   store <16 x i8> %3, <16 x i8>* %c
   1467   ; CHECK-DAG: st.b [[R3]], 0($4)
   1468 
   1469   ret void
   1470   ; CHECK: .size bclri_v16i8
   1471 }
   1472 
   1473 define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1474   ; CHECK: bclri_v8i16:
   1475 
   1476   %1 = load <8 x i16>, <8 x i16>* %a
   1477   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1478   %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
   1479                      <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   1480   %3 = and <8 x i16> %1, %2
   1481   ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
   1482   store <8 x i16> %3, <8 x i16>* %c
   1483   ; CHECK-DAG: st.h [[R3]], 0($4)
   1484 
   1485   ret void
   1486   ; CHECK: .size bclri_v8i16
   1487 }
   1488 
   1489 define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1490   ; CHECK: bclri_v4i32:
   1491 
   1492   %1 = load <4 x i32>, <4 x i32>* %a
   1493   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1494   %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
   1495                      <i32 -1, i32 -1, i32 -1, i32 -1>
   1496   %3 = and <4 x i32> %1, %2
   1497   ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
   1498   store <4 x i32> %3, <4 x i32>* %c
   1499   ; CHECK-DAG: st.w [[R3]], 0($4)
   1500 
   1501   ret void
   1502   ; CHECK: .size bclri_v4i32
   1503 }
   1504 
   1505 define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1506   ; CHECK: bclri_v2i64:
   1507 
   1508   %1 = load <2 x i64>, <2 x i64>* %a
   1509   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1510   %2 = xor <2 x i64> <i64  8, i64  8>,
   1511                      <i64 -1, i64 -1>
   1512   %3 = and <2 x i64> %1, %2
   1513   ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
   1514   store <2 x i64> %3, <2 x i64>* %c
   1515   ; CHECK-DAG: st.d [[R3]], 0($4)
   1516 
   1517   ret void
   1518   ; CHECK: .size bclri_v2i64
   1519 }
   1520 
   1521 define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1522   ; CHECK: bseti_v16i8:
   1523 
   1524   %1 = load <16 x i8>, <16 x i8>* %a
   1525   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1526   %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
   1527   ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
   1528   store <16 x i8> %2, <16 x i8>* %c
   1529   ; CHECK-DAG: st.b [[R3]], 0($4)
   1530 
   1531   ret void
   1532   ; CHECK: .size bseti_v16i8
   1533 }
   1534 
   1535 define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1536   ; CHECK: bseti_v8i16:
   1537 
   1538   %1 = load <8 x i16>, <8 x i16>* %a
   1539   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1540   %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   1541   ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
   1542   store <8 x i16> %2, <8 x i16>* %c
   1543   ; CHECK-DAG: st.h [[R3]], 0($4)
   1544 
   1545   ret void
   1546   ; CHECK: .size bseti_v8i16
   1547 }
   1548 
   1549 define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1550   ; CHECK: bseti_v4i32:
   1551 
   1552   %1 = load <4 x i32>, <4 x i32>* %a
   1553   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1554   %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
   1555   ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
   1556   store <4 x i32> %2, <4 x i32>* %c
   1557   ; CHECK-DAG: st.w [[R3]], 0($4)
   1558 
   1559   ret void
   1560   ; CHECK: .size bseti_v4i32
   1561 }
   1562 
   1563 define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1564   ; CHECK: bseti_v2i64:
   1565 
   1566   %1 = load <2 x i64>, <2 x i64>* %a
   1567   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1568   %2 = or <2 x i64> %1, <i64 8, i64 8>
   1569   ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
   1570   store <2 x i64> %2, <2 x i64>* %c
   1571   ; CHECK-DAG: st.d [[R3]], 0($4)
   1572 
   1573   ret void
   1574   ; CHECK: .size bseti_v2i64
   1575 }
   1576 
   1577 define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1578   ; CHECK: bnegi_v16i8:
   1579 
   1580   %1 = load <16 x i8>, <16 x i8>* %a
   1581   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1582   %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
   1583   ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
   1584   store <16 x i8> %2, <16 x i8>* %c
   1585   ; CHECK-DAG: st.b [[R3]], 0($4)
   1586 
   1587   ret void
   1588   ; CHECK: .size bnegi_v16i8
   1589 }
   1590 
   1591 define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1592   ; CHECK: bnegi_v8i16:
   1593 
   1594   %1 = load <8 x i16>, <8 x i16>* %a
   1595   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1596   %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   1597   ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
   1598   store <8 x i16> %2, <8 x i16>* %c
   1599   ; CHECK-DAG: st.h [[R3]], 0($4)
   1600 
   1601   ret void
   1602   ; CHECK: .size bnegi_v8i16
   1603 }
   1604 
   1605 define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1606   ; CHECK: bnegi_v4i32:
   1607 
   1608   %1 = load <4 x i32>, <4 x i32>* %a
   1609   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1610   %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
   1611   ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
   1612   store <4 x i32> %2, <4 x i32>* %c
   1613   ; CHECK-DAG: st.w [[R3]], 0($4)
   1614 
   1615   ret void
   1616   ; CHECK: .size bnegi_v4i32
   1617 }
   1618 
   1619 define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1620   ; CHECK: bnegi_v2i64:
   1621 
   1622   %1 = load <2 x i64>, <2 x i64>* %a
   1623   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1624   %2 = xor <2 x i64> %1, <i64 8, i64 8>
   1625   ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
   1626   store <2 x i64> %2, <2 x i64>* %c
   1627   ; CHECK-DAG: st.d [[R3]], 0($4)
   1628 
   1629   ret void
   1630   ; CHECK: .size bnegi_v2i64
   1631 }
   1632 
   1633 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
   1634 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
   1635 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
   1636 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
   1637 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
   1638 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
   1639 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
   1640 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)
   1641