Home | History | Annotate | Download | only in msa
      1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
      2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
      3 
      4 define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
      5   ; CHECK: ceq_v16i8:
      6 
      7   %1 = load <16 x i8>, <16 x i8>* %a
      8   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
      9   %2 = load <16 x i8>, <16 x i8>* %b
     10   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     11   %3 = icmp eq <16 x i8> %1, %2
     12   %4 = sext <16 x i1> %3 to <16 x i8>
     13   ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     14   store <16 x i8> %4, <16 x i8>* %c
     15   ; CHECK-DAG: st.b [[R3]], 0($4)
     16 
     17   ret void
     18   ; CHECK: .size ceq_v16i8
     19 }
     20 
     21 define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     22   ; CHECK: ceq_v8i16:
     23 
     24   %1 = load <8 x i16>, <8 x i16>* %a
     25   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     26   %2 = load <8 x i16>, <8 x i16>* %b
     27   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
     28   %3 = icmp eq <8 x i16> %1, %2
     29   %4 = sext <8 x i1> %3 to <8 x i16>
     30   ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     31   store <8 x i16> %4, <8 x i16>* %c
     32   ; CHECK-DAG: st.h [[R3]], 0($4)
     33 
     34   ret void
     35   ; CHECK: .size ceq_v8i16
     36 }
     37 
     38 define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
     39   ; CHECK: ceq_v4i32:
     40 
     41   %1 = load <4 x i32>, <4 x i32>* %a
     42   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
     43   %2 = load <4 x i32>, <4 x i32>* %b
     44   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
     45   %3 = icmp eq <4 x i32> %1, %2
     46   %4 = sext <4 x i1> %3 to <4 x i32>
     47   ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     48   store <4 x i32> %4, <4 x i32>* %c
     49   ; CHECK-DAG: st.w [[R3]], 0($4)
     50 
     51   ret void
     52   ; CHECK: .size ceq_v4i32
     53 }
     54 
     55 define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
     56   ; CHECK: ceq_v2i64:
     57 
     58   %1 = load <2 x i64>, <2 x i64>* %a
     59   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
     60   %2 = load <2 x i64>, <2 x i64>* %b
     61   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
     62   %3 = icmp eq <2 x i64> %1, %2
     63   %4 = sext <2 x i1> %3 to <2 x i64>
     64   ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     65   store <2 x i64> %4, <2 x i64>* %c
     66   ; CHECK-DAG: st.d [[R3]], 0($4)
     67 
     68   ret void
     69   ; CHECK: .size ceq_v2i64
     70 }
     71 
     72 define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     73   ; CHECK: cle_s_v16i8:
     74 
     75   %1 = load <16 x i8>, <16 x i8>* %a
     76   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     77   %2 = load <16 x i8>, <16 x i8>* %b
     78   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     79   %3 = icmp sle <16 x i8> %1, %2
     80   %4 = sext <16 x i1> %3 to <16 x i8>
     81   ; CHECK-DAG: cle_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     82   store <16 x i8> %4, <16 x i8>* %c
     83   ; CHECK-DAG: st.b [[R3]], 0($4)
     84 
     85   ret void
     86   ; CHECK: .size cle_s_v16i8
     87 }
     88 
     89 define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     90   ; CHECK: cle_s_v8i16:
     91 
     92   %1 = load <8 x i16>, <8 x i16>* %a
     93   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     94   %2 = load <8 x i16>, <8 x i16>* %b
     95   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
     96   %3 = icmp sle <8 x i16> %1, %2
     97   %4 = sext <8 x i1> %3 to <8 x i16>
     98   ; CHECK-DAG: cle_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     99   store <8 x i16> %4, <8 x i16>* %c
    100   ; CHECK-DAG: st.h [[R3]], 0($4)
    101 
    102   ret void
    103   ; CHECK: .size cle_s_v8i16
    104 }
    105 
    106 define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    107   ; CHECK: cle_s_v4i32:
    108 
    109   %1 = load <4 x i32>, <4 x i32>* %a
    110   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    111   %2 = load <4 x i32>, <4 x i32>* %b
    112   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    113   %3 = icmp sle <4 x i32> %1, %2
    114   %4 = sext <4 x i1> %3 to <4 x i32>
    115   ; CHECK-DAG: cle_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    116   store <4 x i32> %4, <4 x i32>* %c
    117   ; CHECK-DAG: st.w [[R3]], 0($4)
    118 
    119   ret void
    120   ; CHECK: .size cle_s_v4i32
    121 }
    122 
    123 define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    124   ; CHECK: cle_s_v2i64:
    125 
    126   %1 = load <2 x i64>, <2 x i64>* %a
    127   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    128   %2 = load <2 x i64>, <2 x i64>* %b
    129   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    130   %3 = icmp sle <2 x i64> %1, %2
    131   %4 = sext <2 x i1> %3 to <2 x i64>
    132   ; CHECK-DAG: cle_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    133   store <2 x i64> %4, <2 x i64>* %c
    134   ; CHECK-DAG: st.d [[R3]], 0($4)
    135 
    136   ret void
    137   ; CHECK: .size cle_s_v2i64
    138 }
    139 
    140 define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    141   ; CHECK: cle_u_v16i8:
    142 
    143   %1 = load <16 x i8>, <16 x i8>* %a
    144   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    145   %2 = load <16 x i8>, <16 x i8>* %b
    146   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    147   %3 = icmp ule <16 x i8> %1, %2
    148   %4 = sext <16 x i1> %3 to <16 x i8>
    149   ; CHECK-DAG: cle_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    150   store <16 x i8> %4, <16 x i8>* %c
    151   ; CHECK-DAG: st.b [[R3]], 0($4)
    152 
    153   ret void
    154   ; CHECK: .size cle_u_v16i8
    155 }
    156 
    157 define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    158   ; CHECK: cle_u_v8i16:
    159 
    160   %1 = load <8 x i16>, <8 x i16>* %a
    161   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    162   %2 = load <8 x i16>, <8 x i16>* %b
    163   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    164   %3 = icmp ule <8 x i16> %1, %2
    165   %4 = sext <8 x i1> %3 to <8 x i16>
    166   ; CHECK-DAG: cle_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    167   store <8 x i16> %4, <8 x i16>* %c
    168   ; CHECK-DAG: st.h [[R3]], 0($4)
    169 
    170   ret void
    171   ; CHECK: .size cle_u_v8i16
    172 }
    173 
    174 define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    175   ; CHECK: cle_u_v4i32:
    176 
    177   %1 = load <4 x i32>, <4 x i32>* %a
    178   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    179   %2 = load <4 x i32>, <4 x i32>* %b
    180   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    181   %3 = icmp ule <4 x i32> %1, %2
    182   %4 = sext <4 x i1> %3 to <4 x i32>
    183   ; CHECK-DAG: cle_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    184   store <4 x i32> %4, <4 x i32>* %c
    185   ; CHECK-DAG: st.w [[R3]], 0($4)
    186 
    187   ret void
    188   ; CHECK: .size cle_u_v4i32
    189 }
    190 
    191 define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    192   ; CHECK: cle_u_v2i64:
    193 
    194   %1 = load <2 x i64>, <2 x i64>* %a
    195   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    196   %2 = load <2 x i64>, <2 x i64>* %b
    197   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    198   %3 = icmp ule <2 x i64> %1, %2
    199   %4 = sext <2 x i1> %3 to <2 x i64>
    200   ; CHECK-DAG: cle_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    201   store <2 x i64> %4, <2 x i64>* %c
    202   ; CHECK-DAG: st.d [[R3]], 0($4)
    203 
    204   ret void
    205   ; CHECK: .size cle_u_v2i64
    206 }
    207 
    208 define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    209   ; CHECK: clt_s_v16i8:
    210 
    211   %1 = load <16 x i8>, <16 x i8>* %a
    212   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    213   %2 = load <16 x i8>, <16 x i8>* %b
    214   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    215   %3 = icmp slt <16 x i8> %1, %2
    216   %4 = sext <16 x i1> %3 to <16 x i8>
    217   ; CHECK-DAG: clt_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    218   store <16 x i8> %4, <16 x i8>* %c
    219   ; CHECK-DAG: st.b [[R3]], 0($4)
    220 
    221   ret void
    222   ; CHECK: .size clt_s_v16i8
    223 }
    224 
    225 define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    226   ; CHECK: clt_s_v8i16:
    227 
    228   %1 = load <8 x i16>, <8 x i16>* %a
    229   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    230   %2 = load <8 x i16>, <8 x i16>* %b
    231   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    232   %3 = icmp slt <8 x i16> %1, %2
    233   %4 = sext <8 x i1> %3 to <8 x i16>
    234   ; CHECK-DAG: clt_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    235   store <8 x i16> %4, <8 x i16>* %c
    236   ; CHECK-DAG: st.h [[R3]], 0($4)
    237 
    238   ret void
    239   ; CHECK: .size clt_s_v8i16
    240 }
    241 
    242 define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    243   ; CHECK: clt_s_v4i32:
    244 
    245   %1 = load <4 x i32>, <4 x i32>* %a
    246   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    247   %2 = load <4 x i32>, <4 x i32>* %b
    248   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    249   %3 = icmp slt <4 x i32> %1, %2
    250   %4 = sext <4 x i1> %3 to <4 x i32>
    251   ; CHECK-DAG: clt_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    252   store <4 x i32> %4, <4 x i32>* %c
    253   ; CHECK-DAG: st.w [[R3]], 0($4)
    254 
    255   ret void
    256   ; CHECK: .size clt_s_v4i32
    257 }
    258 
    259 define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    260   ; CHECK: clt_s_v2i64:
    261 
    262   %1 = load <2 x i64>, <2 x i64>* %a
    263   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    264   %2 = load <2 x i64>, <2 x i64>* %b
    265   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    266   %3 = icmp slt <2 x i64> %1, %2
    267   %4 = sext <2 x i1> %3 to <2 x i64>
    268   ; CHECK-DAG: clt_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    269   store <2 x i64> %4, <2 x i64>* %c
    270   ; CHECK-DAG: st.d [[R3]], 0($4)
    271 
    272   ret void
    273   ; CHECK: .size clt_s_v2i64
    274 }
    275 
    276 define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    277   ; CHECK: clt_u_v16i8:
    278 
    279   %1 = load <16 x i8>, <16 x i8>* %a
    280   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    281   %2 = load <16 x i8>, <16 x i8>* %b
    282   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    283   %3 = icmp ult <16 x i8> %1, %2
    284   %4 = sext <16 x i1> %3 to <16 x i8>
    285   ; CHECK-DAG: clt_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    286   store <16 x i8> %4, <16 x i8>* %c
    287   ; CHECK-DAG: st.b [[R3]], 0($4)
    288 
    289   ret void
    290   ; CHECK: .size clt_u_v16i8
    291 }
    292 
    293 define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    294   ; CHECK: clt_u_v8i16:
    295 
    296   %1 = load <8 x i16>, <8 x i16>* %a
    297   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    298   %2 = load <8 x i16>, <8 x i16>* %b
    299   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    300   %3 = icmp ult <8 x i16> %1, %2
    301   %4 = sext <8 x i1> %3 to <8 x i16>
    302   ; CHECK-DAG: clt_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    303   store <8 x i16> %4, <8 x i16>* %c
    304   ; CHECK-DAG: st.h [[R3]], 0($4)
    305 
    306   ret void
    307   ; CHECK: .size clt_u_v8i16
    308 }
    309 
    310 define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    311   ; CHECK: clt_u_v4i32:
    312 
    313   %1 = load <4 x i32>, <4 x i32>* %a
    314   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    315   %2 = load <4 x i32>, <4 x i32>* %b
    316   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    317   %3 = icmp ult <4 x i32> %1, %2
    318   %4 = sext <4 x i1> %3 to <4 x i32>
    319   ; CHECK-DAG: clt_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    320   store <4 x i32> %4, <4 x i32>* %c
    321   ; CHECK-DAG: st.w [[R3]], 0($4)
    322 
    323   ret void
    324   ; CHECK: .size clt_u_v4i32
    325 }
    326 
    327 define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    328   ; CHECK: clt_u_v2i64:
    329 
    330   %1 = load <2 x i64>, <2 x i64>* %a
    331   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    332   %2 = load <2 x i64>, <2 x i64>* %b
    333   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    334   %3 = icmp ult <2 x i64> %1, %2
    335   %4 = sext <2 x i1> %3 to <2 x i64>
    336   ; CHECK-DAG: clt_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    337   store <2 x i64> %4, <2 x i64>* %c
    338   ; CHECK-DAG: st.d [[R3]], 0($4)
    339 
    340   ret void
    341   ; CHECK: .size clt_u_v2i64
    342 }
    343 
    344 ; There is no != comparison, but test it anyway since we've had legalizer
    345 ; issues in this area.
    346 define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    347   ; CHECK: cne_v16i8:
    348   %1 = load <16 x i8>, <16 x i8>* %a
    349   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    350   %2 = load <16 x i8>, <16 x i8>* %b
    351   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    352   %3 = icmp ne <16 x i8> %1, %2
    353   %4 = sext <16 x i1> %3 to <16 x i8>
    354   ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    355   ; CHECK-DAG: xori.b [[R3]], [[R3]], 255
    356   store <16 x i8> %4, <16 x i8>* %c
    357   ; CHECK-DAG: st.b [[R3]], 0($4)
    358 
    359   ret void
    360   ; CHECK: .size cne_v16i8
    361 }
    362 
    363 ; There is no != comparison, but test it anyway since we've had legalizer
    364 ; issues in this area.
    365 define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    366   ; CHECK: cne_v8i16:
    367 
    368   %1 = load <8 x i16>, <8 x i16>* %a
    369   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    370   %2 = load <8 x i16>, <8 x i16>* %b
    371   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    372   %3 = icmp ne <8 x i16> %1, %2
    373   %4 = sext <8 x i1> %3 to <8 x i16>
    374   ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    375   ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
    376   ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
    377   ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
    378   store <8 x i16> %4, <8 x i16>* %c
    379   ; CHECK-DAG: st.h [[R3]], 0($4)
    380 
    381   ret void
    382   ; CHECK: .size cne_v8i16
    383 }
    384 
    385 ; There is no != comparison, but test it anyway since we've had legalizer
    386 ; issues in this area.
    387 define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    388   ; CHECK: cne_v4i32:
    389 
    390   %1 = load <4 x i32>, <4 x i32>* %a
    391   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    392   %2 = load <4 x i32>, <4 x i32>* %b
    393   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    394   %3 = icmp ne <4 x i32> %1, %2
    395   %4 = sext <4 x i1> %3 to <4 x i32>
    396   ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    397   ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
    398   ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
    399   ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
    400   store <4 x i32> %4, <4 x i32>* %c
    401   ; CHECK-DAG: st.w [[R3]], 0($4)
    402 
    403   ret void
    404   ; CHECK: .size cne_v4i32
    405 }
    406 
    407 ; There is no != comparison, but test it anyway since we've had legalizer
    408 ; issues in this area.
    409 define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    410   ; CHECK: cne_v2i64:
    411 
    412   %1 = load <2 x i64>, <2 x i64>* %a
    413   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    414   %2 = load <2 x i64>, <2 x i64>* %b
    415   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    416   %3 = icmp ne <2 x i64> %1, %2
    417   %4 = sext <2 x i1> %3 to <2 x i64>
    418   ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    419   ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
    420   ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
    421   ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
    422   store <2 x i64> %4, <2 x i64>* %c
    423   ; CHECK-DAG: st.d [[R3]], 0($4)
    424 
    425   ret void
    426   ; CHECK: .size cne_v2i64
    427 }
    428 
    429 define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    430   ; CHECK: ceqi_v16i8:
    431 
    432   %1 = load <16 x i8>, <16 x i8>* %a
    433   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    434   %2 = icmp eq <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    435   %3 = sext <16 x i1> %2 to <16 x i8>
    436   ; CHECK-DAG: ceqi.b [[R3:\$w[0-9]+]], [[R1]], 1
    437   store <16 x i8> %3, <16 x i8>* %c
    438   ; CHECK-DAG: st.b [[R3]], 0($4)
    439 
    440   ret void
    441   ; CHECK: .size ceqi_v16i8
    442 }
    443 
    444 define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    445   ; CHECK: ceqi_v8i16:
    446 
    447   %1 = load <8 x i16>, <8 x i16>* %a
    448   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    449   %2 = icmp eq <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    450   %3 = sext <8 x i1> %2 to <8 x i16>
    451   ; CHECK-DAG: ceqi.h [[R3:\$w[0-9]+]], [[R1]], 1
    452   store <8 x i16> %3, <8 x i16>* %c
    453   ; CHECK-DAG: st.h [[R3]], 0($4)
    454 
    455   ret void
    456   ; CHECK: .size ceqi_v8i16
    457 }
    458 
    459 define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    460   ; CHECK: ceqi_v4i32:
    461 
    462   %1 = load <4 x i32>, <4 x i32>* %a
    463   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    464   %2 = icmp eq <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    465   %3 = sext <4 x i1> %2 to <4 x i32>
    466   ; CHECK-DAG: ceqi.w [[R3:\$w[0-9]+]], [[R1]], 1
    467   store <4 x i32> %3, <4 x i32>* %c
    468   ; CHECK-DAG: st.w [[R3]], 0($4)
    469 
    470   ret void
    471   ; CHECK: .size ceqi_v4i32
    472 }
    473 
    474 define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    475   ; CHECK: ceqi_v2i64:
    476 
    477   %1 = load <2 x i64>, <2 x i64>* %a
    478   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    479   %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
    480   %3 = sext <2 x i1> %2 to <2 x i64>
    481   ; CHECK-DAG: ceqi.d [[R3:\$w[0-9]+]], [[R1]], 1
    482   store <2 x i64> %3, <2 x i64>* %c
    483   ; CHECK-DAG: st.d [[R3]], 0($4)
    484 
    485   ret void
    486   ; CHECK: .size ceqi_v2i64
    487 }
    488 
    489 define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    490   ; CHECK: clei_s_v16i8:
    491 
    492   %1 = load <16 x i8>, <16 x i8>* %a
    493   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    494   %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    495   %3 = sext <16 x i1> %2 to <16 x i8>
    496   ; CHECK-DAG: clei_s.b [[R3:\$w[0-9]+]], [[R1]], 1
    497   store <16 x i8> %3, <16 x i8>* %c
    498   ; CHECK-DAG: st.b [[R3]], 0($4)
    499 
    500   ret void
    501   ; CHECK: .size clei_s_v16i8
    502 }
    503 
    504 define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    505   ; CHECK: clei_s_v8i16:
    506 
    507   %1 = load <8 x i16>, <8 x i16>* %a
    508   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    509   %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    510   %3 = sext <8 x i1> %2 to <8 x i16>
    511   ; CHECK-DAG: clei_s.h [[R3:\$w[0-9]+]], [[R1]], 1
    512   store <8 x i16> %3, <8 x i16>* %c
    513   ; CHECK-DAG: st.h [[R3]], 0($4)
    514 
    515   ret void
    516   ; CHECK: .size clei_s_v8i16
    517 }
    518 
    519 define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    520   ; CHECK: clei_s_v4i32:
    521 
    522   %1 = load <4 x i32>, <4 x i32>* %a
    523   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    524   %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    525   %3 = sext <4 x i1> %2 to <4 x i32>
    526   ; CHECK-DAG: clei_s.w [[R3:\$w[0-9]+]], [[R1]], 1
    527   store <4 x i32> %3, <4 x i32>* %c
    528   ; CHECK-DAG: st.w [[R3]], 0($4)
    529 
    530   ret void
    531   ; CHECK: .size clei_s_v4i32
    532 }
    533 
    534 define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    535   ; CHECK: clei_s_v2i64:
    536 
    537   %1 = load <2 x i64>, <2 x i64>* %a
    538   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    539   %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
    540   %3 = sext <2 x i1> %2 to <2 x i64>
    541   ; CHECK-DAG: clei_s.d [[R3:\$w[0-9]+]], [[R1]], 1
    542   store <2 x i64> %3, <2 x i64>* %c
    543   ; CHECK-DAG: st.d [[R3]], 0($4)
    544 
    545   ret void
    546   ; CHECK: .size clei_s_v2i64
    547 }
    548 
    549 define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    550   ; CHECK: clei_u_v16i8:
    551 
    552   %1 = load <16 x i8>, <16 x i8>* %a
    553   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    554   %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    555   %3 = sext <16 x i1> %2 to <16 x i8>
    556   ; CHECK-DAG: clei_u.b [[R3:\$w[0-9]+]], [[R1]], 1
    557   store <16 x i8> %3, <16 x i8>* %c
    558   ; CHECK-DAG: st.b [[R3]], 0($4)
    559 
    560   ret void
    561   ; CHECK: .size clei_u_v16i8
    562 }
    563 
    564 define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    565   ; CHECK: clei_u_v8i16:
    566 
    567   %1 = load <8 x i16>, <8 x i16>* %a
    568   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    569   %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    570   %3 = sext <8 x i1> %2 to <8 x i16>
    571   ; CHECK-DAG: clei_u.h [[R3:\$w[0-9]+]], [[R1]], 1
    572   store <8 x i16> %3, <8 x i16>* %c
    573   ; CHECK-DAG: st.h [[R3]], 0($4)
    574 
    575   ret void
    576   ; CHECK: .size clei_u_v8i16
    577 }
    578 
    579 define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    580   ; CHECK: clei_u_v4i32:
    581 
    582   %1 = load <4 x i32>, <4 x i32>* %a
    583   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    584   %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    585   %3 = sext <4 x i1> %2 to <4 x i32>
    586   ; CHECK-DAG: clei_u.w [[R3:\$w[0-9]+]], [[R1]], 1
    587   store <4 x i32> %3, <4 x i32>* %c
    588   ; CHECK-DAG: st.w [[R3]], 0($4)
    589 
    590   ret void
    591   ; CHECK: .size clei_u_v4i32
    592 }
    593 
    594 define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    595   ; CHECK: clei_u_v2i64:
    596 
    597   %1 = load <2 x i64>, <2 x i64>* %a
    598   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    599   %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
    600   %3 = sext <2 x i1> %2 to <2 x i64>
    601   ; CHECK-DAG: clei_u.d [[R3:\$w[0-9]+]], [[R1]], 1
    602   store <2 x i64> %3, <2 x i64>* %c
    603   ; CHECK-DAG: st.d [[R3]], 0($4)
    604 
    605   ret void
    606   ; CHECK: .size clei_u_v2i64
    607 }
    608 
    609 define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    610   ; CHECK: clti_s_v16i8:
    611 
    612   %1 = load <16 x i8>, <16 x i8>* %a
    613   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    614   %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    615   %3 = sext <16 x i1> %2 to <16 x i8>
    616   ; CHECK-DAG: clti_s.b [[R3:\$w[0-9]+]], [[R1]], 1
    617   store <16 x i8> %3, <16 x i8>* %c
    618   ; CHECK-DAG: st.b [[R3]], 0($4)
    619 
    620   ret void
    621   ; CHECK: .size clti_s_v16i8
    622 }
    623 
    624 define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    625   ; CHECK: clti_s_v8i16:
    626 
    627   %1 = load <8 x i16>, <8 x i16>* %a
    628   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    629   %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    630   %3 = sext <8 x i1> %2 to <8 x i16>
    631   ; CHECK-DAG: clti_s.h [[R3:\$w[0-9]+]], [[R1]], 1
    632   store <8 x i16> %3, <8 x i16>* %c
    633   ; CHECK-DAG: st.h [[R3]], 0($4)
    634 
    635   ret void
    636   ; CHECK: .size clti_s_v8i16
    637 }
    638 
    639 define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    640   ; CHECK: clti_s_v4i32:
    641 
    642   %1 = load <4 x i32>, <4 x i32>* %a
    643   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    644   %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    645   %3 = sext <4 x i1> %2 to <4 x i32>
    646   ; CHECK-DAG: clti_s.w [[R3:\$w[0-9]+]], [[R1]], 1
    647   store <4 x i32> %3, <4 x i32>* %c
    648   ; CHECK-DAG: st.w [[R3]], 0($4)
    649 
    650   ret void
    651   ; CHECK: .size clti_s_v4i32
    652 }
    653 
    654 define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    655   ; CHECK: clti_s_v2i64:
    656 
    657   %1 = load <2 x i64>, <2 x i64>* %a
    658   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    659   %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
    660   %3 = sext <2 x i1> %2 to <2 x i64>
    661   ; CHECK-DAG: clti_s.d [[R3:\$w[0-9]+]], [[R1]], 1
    662   store <2 x i64> %3, <2 x i64>* %c
    663   ; CHECK-DAG: st.d [[R3]], 0($4)
    664 
    665   ret void
    666   ; CHECK: .size clti_s_v2i64
    667 }
    668 
    669 define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    670   ; CHECK: clti_u_v16i8:
    671 
    672   %1 = load <16 x i8>, <16 x i8>* %a
    673   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    674   %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    675   %3 = sext <16 x i1> %2 to <16 x i8>
    676   ; CHECK-DAG: clti_u.b [[R3:\$w[0-9]+]], [[R1]], 1
    677   store <16 x i8> %3, <16 x i8>* %c
    678   ; CHECK-DAG: st.b [[R3]], 0($4)
    679 
    680   ret void
    681   ; CHECK: .size clti_u_v16i8
    682 }
    683 
    684 define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    685   ; CHECK: clti_u_v8i16:
    686 
    687   %1 = load <8 x i16>, <8 x i16>* %a
    688   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    689   %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    690   %3 = sext <8 x i1> %2 to <8 x i16>
    691   ; CHECK-DAG: clti_u.h [[R3:\$w[0-9]+]], [[R1]], 1
    692   store <8 x i16> %3, <8 x i16>* %c
    693   ; CHECK-DAG: st.h [[R3]], 0($4)
    694 
    695   ret void
    696   ; CHECK: .size clti_u_v8i16
    697 }
    698 
    699 define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    700   ; CHECK: clti_u_v4i32:
    701 
    702   %1 = load <4 x i32>, <4 x i32>* %a
    703   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    704   %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    705   %3 = sext <4 x i1> %2 to <4 x i32>
    706   ; CHECK-DAG: clti_u.w [[R3:\$w[0-9]+]], [[R1]], 1
    707   store <4 x i32> %3, <4 x i32>* %c
    708   ; CHECK-DAG: st.w [[R3]], 0($4)
    709 
    710   ret void
    711   ; CHECK: .size clti_u_v4i32
    712 }
    713 
    714 define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    715   ; CHECK: clti_u_v2i64:
    716 
    717   %1 = load <2 x i64>, <2 x i64>* %a
    718   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    719   %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
    720   %3 = sext <2 x i1> %2 to <2 x i64>
    721   ; CHECK-DAG: clti_u.d [[R3:\$w[0-9]+]], [[R1]], 1
    722   store <2 x i64> %3, <2 x i64>* %c
    723   ; CHECK-DAG: st.d [[R3]], 0($4)
    724 
    725   ret void
    726   ; CHECK: .size clti_u_v2i64
    727 }
    728 
    729 define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
    730                         <16 x i8>* %c) nounwind {
    731   ; CHECK: bsel_s_v16i8:
    732 
    733   %1 = load <16 x i8>, <16 x i8>* %a
    734   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    735   %2 = load <16 x i8>, <16 x i8>* %b
    736   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    737   %3 = load <16 x i8>, <16 x i8>* %c
    738   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
    739   %4 = icmp sgt <16 x i8> %1, %2
    740   ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    741   %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
    742   ; bmnz.v is the same operation
    743   ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
    744   store <16 x i8> %5, <16 x i8>* %d
    745   ; CHECK-DAG: st.b [[R3]], 0($4)
    746 
    747   ret void
    748   ; CHECK: .size bsel_s_v16i8
    749 }
    750 
    751 define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
    752                         <8 x i16>* %c) nounwind {
    753   ; CHECK: bsel_s_v8i16:
    754 
    755   %1 = load <8 x i16>, <8 x i16>* %a
    756   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    757   %2 = load <8 x i16>, <8 x i16>* %b
    758   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    759   %3 = load <8 x i16>, <8 x i16>* %c
    760   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
    761   %4 = icmp sgt <8 x i16> %1, %2
    762   ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    763   %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
    764   ; Note that IfSet and IfClr are swapped since the condition is inverted
    765   ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
    766   store <8 x i16> %5, <8 x i16>* %d
    767   ; CHECK-DAG: st.h [[R4]], 0($4)
    768 
    769   ret void
    770   ; CHECK: .size bsel_s_v8i16
    771 }
    772 
    773 define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
    774                         <4 x i32>* %c) nounwind {
    775   ; CHECK: bsel_s_v4i32:
    776 
    777   %1 = load <4 x i32>, <4 x i32>* %a
    778   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    779   %2 = load <4 x i32>, <4 x i32>* %b
    780   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    781   %3 = load <4 x i32>, <4 x i32>* %c
    782   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
    783   %4 = icmp sgt <4 x i32> %1, %2
    784   ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    785   %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
    786   ; Note that IfSet and IfClr are swapped since the condition is inverted
    787   ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
    788   store <4 x i32> %5, <4 x i32>* %d
    789   ; CHECK-DAG: st.w [[R4]], 0($4)
    790 
    791   ret void
    792   ; CHECK: .size bsel_s_v4i32
    793 }
    794 
    795 define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
    796                         <2 x i64>* %c) nounwind {
    797   ; CHECK: bsel_s_v2i64:
    798 
    799   %1 = load <2 x i64>, <2 x i64>* %a
    800   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    801   %2 = load <2 x i64>, <2 x i64>* %b
    802   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    803   %3 = load <2 x i64>, <2 x i64>* %c
    804   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
    805   %4 = icmp sgt <2 x i64> %1, %2
    806   ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    807   %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
    808   ; Note that IfSet and IfClr are swapped since the condition is inverted
    809   ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
    810   store <2 x i64> %5, <2 x i64>* %d
    811   ; CHECK-DAG: st.d [[R4]], 0($4)
    812 
    813   ret void
    814   ; CHECK: .size bsel_s_v2i64
    815 }
    816 
    817 define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
    818                         <16 x i8>* %c) nounwind {
    819   ; CHECK: bsel_u_v16i8:
    820 
    821   %1 = load <16 x i8>, <16 x i8>* %a
    822   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    823   %2 = load <16 x i8>, <16 x i8>* %b
    824   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    825   %3 = load <16 x i8>, <16 x i8>* %c
    826   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
    827   %4 = icmp ugt <16 x i8> %1, %2
    828   ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    829   %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
    830   ; bmnz.v is the same operation
    831   ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
    832   store <16 x i8> %5, <16 x i8>* %d
    833   ; CHECK-DAG: st.b [[R3]], 0($4)
    834 
    835   ret void
    836   ; CHECK: .size bsel_u_v16i8
    837 }
    838 
    839 define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
    840                         <8 x i16>* %c) nounwind {
    841   ; CHECK: bsel_u_v8i16:
    842 
    843   %1 = load <8 x i16>, <8 x i16>* %a
    844   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    845   %2 = load <8 x i16>, <8 x i16>* %b
    846   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    847   %3 = load <8 x i16>, <8 x i16>* %c
    848   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
    849   %4 = icmp ugt <8 x i16> %1, %2
    850   ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    851   %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
    852   ; Note that IfSet and IfClr are swapped since the condition is inverted
    853   ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
    854   store <8 x i16> %5, <8 x i16>* %d
    855   ; CHECK-DAG: st.h [[R4]], 0($4)
    856 
    857   ret void
    858   ; CHECK: .size bsel_u_v8i16
    859 }
    860 
    861 define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
    862                         <4 x i32>* %c) nounwind {
    863   ; CHECK: bsel_u_v4i32:
    864 
    865   %1 = load <4 x i32>, <4 x i32>* %a
    866   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    867   %2 = load <4 x i32>, <4 x i32>* %b
    868   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    869   %3 = load <4 x i32>, <4 x i32>* %c
    870   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
    871   %4 = icmp ugt <4 x i32> %1, %2
    872   ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    873   %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
    874   ; Note that IfSet and IfClr are swapped since the condition is inverted
    875   ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
    876   store <4 x i32> %5, <4 x i32>* %d
    877   ; CHECK-DAG: st.w [[R4]], 0($4)
    878 
    879   ret void
    880   ; CHECK: .size bsel_u_v4i32
    881 }
    882 
    883 define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
    884                         <2 x i64>* %c) nounwind {
    885   ; CHECK: bsel_u_v2i64:
    886 
    887   %1 = load <2 x i64>, <2 x i64>* %a
    888   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    889   %2 = load <2 x i64>, <2 x i64>* %b
    890   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    891   %3 = load <2 x i64>, <2 x i64>* %c
    892   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
    893   %4 = icmp ugt <2 x i64> %1, %2
    894   ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    895   %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
    896   ; Note that IfSet and IfClr are swapped since the condition is inverted
    897   ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]]
    898   store <2 x i64> %5, <2 x i64>* %d
    899   ; CHECK-DAG: st.d [[R4]], 0($4)
    900 
    901   ret void
    902   ; CHECK: .size bsel_u_v2i64
    903 }
    904 
    905 define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
    906                         <16 x i8>* %c) nounwind {
    907   ; CHECK: bseli_s_v16i8:
    908 
    909   %1 = load <16 x i8>, <16 x i8>* %a
    910   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    911   %2 = load <16 x i8>, <16 x i8>* %b
    912   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    913   %3 = icmp sgt <16 x i8> %1, %2
    914   ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    915   %4 = select <16 x i1> %3, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %1
    916   ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
    917   store <16 x i8> %4, <16 x i8>* %d
    918   ; CHECK-DAG: st.b [[R4]], 0($4)
    919 
    920   ret void
    921   ; CHECK: .size bseli_s_v16i8
    922 }
    923 
    924 define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
    925                         <8 x i16>* %c) nounwind {
    926   ; CHECK: bseli_s_v8i16:
    927 
    928   %1 = load <8 x i16>, <8 x i16>* %a
    929   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    930   %2 = load <8 x i16>, <8 x i16>* %b
    931   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    932   %3 = icmp sgt <8 x i16> %1, %2
    933   ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    934   %4 = select <8 x i1> %3, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %1
    935   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
    936   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
    937   store <8 x i16> %4, <8 x i16>* %d
    938   ; CHECK-DAG: st.h [[R4]], 0($4)
    939 
    940   ret void
    941   ; CHECK: .size bseli_s_v8i16
    942 }
    943 
    944 define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
    945                         <4 x i32>* %c) nounwind {
    946   ; CHECK: bseli_s_v4i32:
    947 
    948   %1 = load <4 x i32>, <4 x i32>* %a
    949   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    950   %2 = load <4 x i32>, <4 x i32>* %b
    951   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    952   %3 = icmp sgt <4 x i32> %1, %2
    953   ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    954   %4 = select <4 x i1> %3, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %1
    955   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
    956   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
    957   store <4 x i32> %4, <4 x i32>* %d
    958   ; CHECK-DAG: st.w [[R4]], 0($4)
    959 
    960   ret void
    961   ; CHECK: .size bseli_s_v4i32
    962 }
    963 
    964 define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
    965                         <2 x i64>* %c) nounwind {
    966   ; CHECK: bseli_s_v2i64:
    967 
    968   %1 = load <2 x i64>, <2 x i64>* %a
    969   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    970   %2 = load <2 x i64>, <2 x i64>* %b
    971   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    972   %3 = icmp sgt <2 x i64> %1, %2
    973   ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    974   %4 = select <2 x i1> %3, <2 x i64> <i64 1, i64 1>, <2 x i64> %1
    975   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
    976   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
    977   store <2 x i64> %4, <2 x i64>* %d
    978   ; CHECK-DAG: st.d [[R4]], 0($4)
    979 
    980   ret void
    981   ; CHECK: .size bseli_s_v2i64
    982 }
    983 
    984 define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
    985                         <16 x i8>* %c) nounwind {
    986   ; CHECK: bseli_u_v16i8:
    987 
    988   %1 = load <16 x i8>, <16 x i8>* %a
    989   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    990   %2 = load <16 x i8>, <16 x i8>* %b
    991   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    992   %3 = icmp ugt <16 x i8> %1, %2
    993   ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
    994   %4 = select <16 x i1> %3, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %1
    995   ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
    996   store <16 x i8> %4, <16 x i8>* %d
    997   ; CHECK-DAG: st.b [[R4]], 0($4)
    998 
    999   ret void
   1000   ; CHECK: .size bseli_u_v16i8
   1001 }
   1002 
   1003 define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
   1004                         <8 x i16>* %c) nounwind {
   1005   ; CHECK: bseli_u_v8i16:
   1006 
   1007   %1 = load <8 x i16>, <8 x i16>* %a
   1008   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1009   %2 = load <8 x i16>, <8 x i16>* %b
   1010   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1011   %3 = icmp ugt <8 x i16> %1, %2
   1012   ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   1013   %4 = select <8 x i1> %3, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %1
   1014   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
   1015   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   1016   store <8 x i16> %4, <8 x i16>* %d
   1017   ; CHECK-DAG: st.h [[R4]], 0($4)
   1018 
   1019   ret void
   1020   ; CHECK: .size bseli_u_v8i16
   1021 }
   1022 
   1023 define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
   1024                         <4 x i32>* %c) nounwind {
   1025   ; CHECK: bseli_u_v4i32:
   1026 
   1027   %1 = load <4 x i32>, <4 x i32>* %a
   1028   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1029   %2 = load <4 x i32>, <4 x i32>* %b
   1030   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1031   %3 = icmp ugt <4 x i32> %1, %2
   1032   ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   1033   %4 = select <4 x i1> %3, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> %1
   1034   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
   1035   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   1036   store <4 x i32> %4, <4 x i32>* %d
   1037   ; CHECK-DAG: st.w [[R4]], 0($4)
   1038 
   1039   ret void
   1040   ; CHECK: .size bseli_u_v4i32
   1041 }
   1042 
   1043 define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
   1044                         <2 x i64>* %c) nounwind {
   1045   ; CHECK: bseli_u_v2i64:
   1046 
   1047   %1 = load <2 x i64>, <2 x i64>* %a
   1048   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1049   %2 = load <2 x i64>, <2 x i64>* %b
   1050   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1051   %3 = icmp ugt <2 x i64> %1, %2
   1052   ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
   1053   %4 = select <2 x i1> %3, <2 x i64> <i64 1, i64 1>, <2 x i64> %1
   1054   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
   1055   ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
   1056   store <2 x i64> %4, <2 x i64>* %d
   1057   ; CHECK-DAG: st.d [[R4]], 0($4)
   1058 
   1059   ret void
   1060   ; CHECK: .size bseli_u_v2i64
   1061 }
   1062 
   1063 define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1064   ; CHECK: max_s_v16i8:
   1065 
   1066   %1 = load <16 x i8>, <16 x i8>* %a
   1067   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1068   %2 = load <16 x i8>, <16 x i8>* %b
   1069   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1070   %3 = icmp sgt <16 x i8> %1, %2
   1071   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1072   ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1073   store <16 x i8> %4, <16 x i8>* %c
   1074   ; CHECK-DAG: st.b [[R3]], 0($4)
   1075 
   1076   ret void
   1077   ; CHECK: .size max_s_v16i8
   1078 }
   1079 
   1080 define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1081   ; CHECK: max_s_v8i16:
   1082 
   1083   %1 = load <8 x i16>, <8 x i16>* %a
   1084   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1085   %2 = load <8 x i16>, <8 x i16>* %b
   1086   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1087   %3 = icmp sgt <8 x i16> %1, %2
   1088   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1089   ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1090   store <8 x i16> %4, <8 x i16>* %c
   1091   ; CHECK-DAG: st.h [[R3]], 0($4)
   1092 
   1093   ret void
   1094   ; CHECK: .size max_s_v8i16
   1095 }
   1096 
   1097 define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1098   ; CHECK: max_s_v4i32:
   1099 
   1100   %1 = load <4 x i32>, <4 x i32>* %a
   1101   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1102   %2 = load <4 x i32>, <4 x i32>* %b
   1103   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1104   %3 = icmp sgt <4 x i32> %1, %2
   1105   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1106   ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1107   store <4 x i32> %4, <4 x i32>* %c
   1108   ; CHECK-DAG: st.w [[R3]], 0($4)
   1109 
   1110   ret void
   1111   ; CHECK: .size max_s_v4i32
   1112 }
   1113 
   1114 define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1115   ; CHECK: max_s_v2i64:
   1116 
   1117   %1 = load <2 x i64>, <2 x i64>* %a
   1118   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1119   %2 = load <2 x i64>, <2 x i64>* %b
   1120   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1121   %3 = icmp sgt <2 x i64> %1, %2
   1122   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1123   ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1124   store <2 x i64> %4, <2 x i64>* %c
   1125   ; CHECK-DAG: st.d [[R3]], 0($4)
   1126 
   1127   ret void
   1128   ; CHECK: .size max_s_v2i64
   1129 }
   1130 
   1131 define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1132   ; CHECK: max_u_v16i8:
   1133 
   1134   %1 = load <16 x i8>, <16 x i8>* %a
   1135   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1136   %2 = load <16 x i8>, <16 x i8>* %b
   1137   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1138   %3 = icmp ugt <16 x i8> %1, %2
   1139   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1140   ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1141   store <16 x i8> %4, <16 x i8>* %c
   1142   ; CHECK-DAG: st.b [[R3]], 0($4)
   1143 
   1144   ret void
   1145   ; CHECK: .size max_u_v16i8
   1146 }
   1147 
   1148 define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1149   ; CHECK: max_u_v8i16:
   1150 
   1151   %1 = load <8 x i16>, <8 x i16>* %a
   1152   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1153   %2 = load <8 x i16>, <8 x i16>* %b
   1154   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1155   %3 = icmp ugt <8 x i16> %1, %2
   1156   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1157   ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1158   store <8 x i16> %4, <8 x i16>* %c
   1159   ; CHECK-DAG: st.h [[R3]], 0($4)
   1160 
   1161   ret void
   1162   ; CHECK: .size max_u_v8i16
   1163 }
   1164 
   1165 define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1166   ; CHECK: max_u_v4i32:
   1167 
   1168   %1 = load <4 x i32>, <4 x i32>* %a
   1169   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1170   %2 = load <4 x i32>, <4 x i32>* %b
   1171   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1172   %3 = icmp ugt <4 x i32> %1, %2
   1173   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1174   ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1175   store <4 x i32> %4, <4 x i32>* %c
   1176   ; CHECK-DAG: st.w [[R3]], 0($4)
   1177 
   1178   ret void
   1179   ; CHECK: .size max_u_v4i32
   1180 }
   1181 
   1182 define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1183   ; CHECK: max_u_v2i64:
   1184 
   1185   %1 = load <2 x i64>, <2 x i64>* %a
   1186   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1187   %2 = load <2 x i64>, <2 x i64>* %b
   1188   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1189   %3 = icmp ugt <2 x i64> %1, %2
   1190   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1191   ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1192   store <2 x i64> %4, <2 x i64>* %c
   1193   ; CHECK-DAG: st.d [[R3]], 0($4)
   1194 
   1195   ret void
   1196   ; CHECK: .size max_u_v2i64
   1197 }
   1198 
   1199 define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1200   ; CHECK: max_s_eq_v16i8:
   1201 
   1202   %1 = load <16 x i8>, <16 x i8>* %a
   1203   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1204   %2 = load <16 x i8>, <16 x i8>* %b
   1205   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1206   %3 = icmp sge <16 x i8> %1, %2
   1207   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1208   ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1209   store <16 x i8> %4, <16 x i8>* %c
   1210   ; CHECK-DAG: st.b [[R3]], 0($4)
   1211 
   1212   ret void
   1213   ; CHECK: .size max_s_eq_v16i8
   1214 }
   1215 
   1216 define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1217   ; CHECK: max_s_eq_v8i16:
   1218 
   1219   %1 = load <8 x i16>, <8 x i16>* %a
   1220   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1221   %2 = load <8 x i16>, <8 x i16>* %b
   1222   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1223   %3 = icmp sge <8 x i16> %1, %2
   1224   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1225   ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1226   store <8 x i16> %4, <8 x i16>* %c
   1227   ; CHECK-DAG: st.h [[R3]], 0($4)
   1228 
   1229   ret void
   1230   ; CHECK: .size max_s_eq_v8i16
   1231 }
   1232 
   1233 define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1234   ; CHECK: max_s_eq_v4i32:
   1235 
   1236   %1 = load <4 x i32>, <4 x i32>* %a
   1237   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1238   %2 = load <4 x i32>, <4 x i32>* %b
   1239   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1240   %3 = icmp sge <4 x i32> %1, %2
   1241   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1242   ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1243   store <4 x i32> %4, <4 x i32>* %c
   1244   ; CHECK-DAG: st.w [[R3]], 0($4)
   1245 
   1246   ret void
   1247   ; CHECK: .size max_s_eq_v4i32
   1248 }
   1249 
   1250 define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1251   ; CHECK: max_s_eq_v2i64:
   1252 
   1253   %1 = load <2 x i64>, <2 x i64>* %a
   1254   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1255   %2 = load <2 x i64>, <2 x i64>* %b
   1256   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1257   %3 = icmp sge <2 x i64> %1, %2
   1258   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1259   ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1260   store <2 x i64> %4, <2 x i64>* %c
   1261   ; CHECK-DAG: st.d [[R3]], 0($4)
   1262 
   1263   ret void
   1264   ; CHECK: .size max_s_eq_v2i64
   1265 }
   1266 
   1267 define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1268   ; CHECK: max_u_eq_v16i8:
   1269 
   1270   %1 = load <16 x i8>, <16 x i8>* %a
   1271   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1272   %2 = load <16 x i8>, <16 x i8>* %b
   1273   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1274   %3 = icmp uge <16 x i8> %1, %2
   1275   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1276   ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1277   store <16 x i8> %4, <16 x i8>* %c
   1278   ; CHECK-DAG: st.b [[R3]], 0($4)
   1279 
   1280   ret void
   1281   ; CHECK: .size max_u_eq_v16i8
   1282 }
   1283 
   1284 define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1285   ; CHECK: max_u_eq_v8i16:
   1286 
   1287   %1 = load <8 x i16>, <8 x i16>* %a
   1288   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1289   %2 = load <8 x i16>, <8 x i16>* %b
   1290   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1291   %3 = icmp uge <8 x i16> %1, %2
   1292   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1293   ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1294   store <8 x i16> %4, <8 x i16>* %c
   1295   ; CHECK-DAG: st.h [[R3]], 0($4)
   1296 
   1297   ret void
   1298   ; CHECK: .size max_u_eq_v8i16
   1299 }
   1300 
   1301 define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1302   ; CHECK: max_u_eq_v4i32:
   1303 
   1304   %1 = load <4 x i32>, <4 x i32>* %a
   1305   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1306   %2 = load <4 x i32>, <4 x i32>* %b
   1307   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1308   %3 = icmp uge <4 x i32> %1, %2
   1309   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1310   ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1311   store <4 x i32> %4, <4 x i32>* %c
   1312   ; CHECK-DAG: st.w [[R3]], 0($4)
   1313 
   1314   ret void
   1315   ; CHECK: .size max_u_eq_v4i32
   1316 }
   1317 
   1318 define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1319   ; CHECK: max_u_eq_v2i64:
   1320 
   1321   %1 = load <2 x i64>, <2 x i64>* %a
   1322   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1323   %2 = load <2 x i64>, <2 x i64>* %b
   1324   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1325   %3 = icmp uge <2 x i64> %1, %2
   1326   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1327   ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1328   store <2 x i64> %4, <2 x i64>* %c
   1329   ; CHECK-DAG: st.d [[R3]], 0($4)
   1330 
   1331   ret void
   1332   ; CHECK: .size max_u_eq_v2i64
   1333 }
   1334 
   1335 define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1336   ; CHECK: maxi_s_v16i8:
   1337 
   1338   %1 = load <16 x i8>, <16 x i8>* %a
   1339   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1340   %2 = icmp sgt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1341   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1342   ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
   1343   store <16 x i8> %3, <16 x i8>* %c
   1344   ; CHECK-DAG: st.b [[R3]], 0($4)
   1345 
   1346   ret void
   1347   ; CHECK: .size maxi_s_v16i8
   1348 }
   1349 
   1350 define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1351   ; CHECK: maxi_s_v8i16:
   1352 
   1353   %1 = load <8 x i16>, <8 x i16>* %a
   1354   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1355   %2 = icmp sgt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1356   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1357   ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
   1358   store <8 x i16> %3, <8 x i16>* %c
   1359   ; CHECK-DAG: st.h [[R3]], 0($4)
   1360 
   1361   ret void
   1362   ; CHECK: .size maxi_s_v8i16
   1363 }
   1364 
   1365 define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1366   ; CHECK: maxi_s_v4i32:
   1367 
   1368   %1 = load <4 x i32>, <4 x i32>* %a
   1369   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1370   %2 = icmp sgt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   1371   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   1372   ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
   1373   store <4 x i32> %3, <4 x i32>* %c
   1374   ; CHECK-DAG: st.w [[R3]], 0($4)
   1375 
   1376   ret void
   1377   ; CHECK: .size maxi_s_v4i32
   1378 }
   1379 
   1380 define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1381   ; CHECK: maxi_s_v2i64:
   1382 
   1383   %1 = load <2 x i64>, <2 x i64>* %a
   1384   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1385   %2 = icmp sgt <2 x i64> %1, <i64 1, i64 1>
   1386   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   1387   ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
   1388   store <2 x i64> %3, <2 x i64>* %c
   1389   ; CHECK-DAG: st.d [[R3]], 0($4)
   1390 
   1391   ret void
   1392   ; CHECK: .size maxi_s_v2i64
   1393 }
   1394 
   1395 define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1396   ; CHECK: maxi_u_v16i8:
   1397 
   1398   %1 = load <16 x i8>, <16 x i8>* %a
   1399   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1400   %2 = icmp ugt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1401   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1402   ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
   1403   store <16 x i8> %3, <16 x i8>* %c
   1404   ; CHECK-DAG: st.b [[R3]], 0($4)
   1405 
   1406   ret void
   1407   ; CHECK: .size maxi_u_v16i8
   1408 }
   1409 
   1410 define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1411   ; CHECK: maxi_u_v8i16:
   1412 
   1413   %1 = load <8 x i16>, <8 x i16>* %a
   1414   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1415   %2 = icmp ugt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1416   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1417   ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
   1418   store <8 x i16> %3, <8 x i16>* %c
   1419   ; CHECK-DAG: st.h [[R3]], 0($4)
   1420 
   1421   ret void
   1422   ; CHECK: .size maxi_u_v8i16
   1423 }
   1424 
   1425 define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1426   ; CHECK: maxi_u_v4i32:
   1427 
   1428   %1 = load <4 x i32>, <4 x i32>* %a
   1429   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1430   %2 = icmp ugt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   1431   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   1432   ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
   1433   store <4 x i32> %3, <4 x i32>* %c
   1434   ; CHECK-DAG: st.w [[R3]], 0($4)
   1435 
   1436   ret void
   1437   ; CHECK: .size maxi_u_v4i32
   1438 }
   1439 
   1440 define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1441   ; CHECK: maxi_u_v2i64:
   1442 
   1443   %1 = load <2 x i64>, <2 x i64>* %a
   1444   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1445   %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
   1446   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   1447   ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
   1448   store <2 x i64> %3, <2 x i64>* %c
   1449   ; CHECK-DAG: st.d [[R3]], 0($4)
   1450 
   1451   ret void
   1452   ; CHECK: .size maxi_u_v2i64
   1453 }
   1454 
   1455 define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1456   ; CHECK: maxi_s_eq_v16i8:
   1457 
   1458   %1 = load <16 x i8>, <16 x i8>* %a
   1459   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1460   %2 = icmp sge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1461   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1462   ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
   1463   store <16 x i8> %3, <16 x i8>* %c
   1464   ; CHECK-DAG: st.b [[R3]], 0($4)
   1465 
   1466   ret void
   1467   ; CHECK: .size maxi_s_eq_v16i8
   1468 }
   1469 
   1470 define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1471   ; CHECK: maxi_s_eq_v8i16:
   1472 
   1473   %1 = load <8 x i16>, <8 x i16>* %a
   1474   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1475   %2 = icmp sge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1476   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1477   ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
   1478   store <8 x i16> %3, <8 x i16>* %c
   1479   ; CHECK-DAG: st.h [[R3]], 0($4)
   1480 
   1481   ret void
   1482   ; CHECK: .size maxi_s_eq_v8i16
   1483 }
   1484 
   1485 define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1486   ; CHECK: maxi_s_eq_v4i32:
   1487 
   1488   %1 = load <4 x i32>, <4 x i32>* %a
   1489   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1490   %2 = icmp sge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   1491   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   1492   ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
   1493   store <4 x i32> %3, <4 x i32>* %c
   1494   ; CHECK-DAG: st.w [[R3]], 0($4)
   1495 
   1496   ret void
   1497   ; CHECK: .size maxi_s_eq_v4i32
   1498 }
   1499 
   1500 define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1501   ; CHECK: maxi_s_eq_v2i64:
   1502 
   1503   %1 = load <2 x i64>, <2 x i64>* %a
   1504   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1505   %2 = icmp sge <2 x i64> %1, <i64 1, i64 1>
   1506   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   1507   ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
   1508   store <2 x i64> %3, <2 x i64>* %c
   1509   ; CHECK-DAG: st.d [[R3]], 0($4)
   1510 
   1511   ret void
   1512   ; CHECK: .size maxi_s_eq_v2i64
   1513 }
   1514 
   1515 define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1516   ; CHECK: maxi_u_eq_v16i8:
   1517 
   1518   %1 = load <16 x i8>, <16 x i8>* %a
   1519   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1520   %2 = icmp uge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1521   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1522   ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
   1523   store <16 x i8> %3, <16 x i8>* %c
   1524   ; CHECK-DAG: st.b [[R3]], 0($4)
   1525 
   1526   ret void
   1527   ; CHECK: .size maxi_u_eq_v16i8
   1528 }
   1529 
   1530 define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1531   ; CHECK: maxi_u_eq_v8i16:
   1532 
   1533   %1 = load <8 x i16>, <8 x i16>* %a
   1534   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1535   %2 = icmp uge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1536   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1537   ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
   1538   store <8 x i16> %3, <8 x i16>* %c
   1539   ; CHECK-DAG: st.h [[R3]], 0($4)
   1540 
   1541   ret void
   1542   ; CHECK: .size maxi_u_eq_v8i16
   1543 }
   1544 
   1545 define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1546   ; CHECK: maxi_u_eq_v4i32:
   1547 
   1548   %1 = load <4 x i32>, <4 x i32>* %a
   1549   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1550   %2 = icmp uge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   1551   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   1552   ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
   1553   store <4 x i32> %3, <4 x i32>* %c
   1554   ; CHECK-DAG: st.w [[R3]], 0($4)
   1555 
   1556   ret void
   1557   ; CHECK: .size maxi_u_eq_v4i32
   1558 }
   1559 
   1560 define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1561   ; CHECK: maxi_u_eq_v2i64:
   1562 
   1563   %1 = load <2 x i64>, <2 x i64>* %a
   1564   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1565   %2 = icmp uge <2 x i64> %1, <i64 1, i64 1>
   1566   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   1567   ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
   1568   store <2 x i64> %3, <2 x i64>* %c
   1569   ; CHECK-DAG: st.d [[R3]], 0($4)
   1570 
   1571   ret void
   1572   ; CHECK: .size maxi_u_eq_v2i64
   1573 }
   1574 
   1575 define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1576   ; CHECK: min_s_v16i8:
   1577 
   1578   %1 = load <16 x i8>, <16 x i8>* %a
   1579   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1580   %2 = load <16 x i8>, <16 x i8>* %b
   1581   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1582   %3 = icmp sle <16 x i8> %1, %2
   1583   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1584   ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1585   store <16 x i8> %4, <16 x i8>* %c
   1586   ; CHECK-DAG: st.b [[R3]], 0($4)
   1587 
   1588   ret void
   1589   ; CHECK: .size min_s_v16i8
   1590 }
   1591 
   1592 define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1593   ; CHECK: min_s_v8i16:
   1594 
   1595   %1 = load <8 x i16>, <8 x i16>* %a
   1596   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1597   %2 = load <8 x i16>, <8 x i16>* %b
   1598   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1599   %3 = icmp slt <8 x i16> %1, %2
   1600   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1601   ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1602   store <8 x i16> %4, <8 x i16>* %c
   1603   ; CHECK-DAG: st.h [[R3]], 0($4)
   1604 
   1605   ret void
   1606   ; CHECK: .size min_s_v8i16
   1607 }
   1608 
   1609 define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1610   ; CHECK: min_s_v4i32:
   1611 
   1612   %1 = load <4 x i32>, <4 x i32>* %a
   1613   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1614   %2 = load <4 x i32>, <4 x i32>* %b
   1615   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1616   %3 = icmp slt <4 x i32> %1, %2
   1617   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1618   ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1619   store <4 x i32> %4, <4 x i32>* %c
   1620   ; CHECK-DAG: st.w [[R3]], 0($4)
   1621 
   1622   ret void
   1623   ; CHECK: .size min_s_v4i32
   1624 }
   1625 
   1626 define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1627   ; CHECK: min_s_v2i64:
   1628 
   1629   %1 = load <2 x i64>, <2 x i64>* %a
   1630   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1631   %2 = load <2 x i64>, <2 x i64>* %b
   1632   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1633   %3 = icmp slt <2 x i64> %1, %2
   1634   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1635   ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1636   store <2 x i64> %4, <2 x i64>* %c
   1637   ; CHECK-DAG: st.d [[R3]], 0($4)
   1638 
   1639   ret void
   1640   ; CHECK: .size min_s_v2i64
   1641 }
   1642 
   1643 define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1644   ; CHECK: min_u_v16i8:
   1645 
   1646   %1 = load <16 x i8>, <16 x i8>* %a
   1647   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1648   %2 = load <16 x i8>, <16 x i8>* %b
   1649   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1650   %3 = icmp ult <16 x i8> %1, %2
   1651   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1652   ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1653   store <16 x i8> %4, <16 x i8>* %c
   1654   ; CHECK-DAG: st.b [[R3]], 0($4)
   1655 
   1656   ret void
   1657   ; CHECK: .size min_u_v16i8
   1658 }
   1659 
   1660 define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1661   ; CHECK: min_u_v8i16:
   1662 
   1663   %1 = load <8 x i16>, <8 x i16>* %a
   1664   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1665   %2 = load <8 x i16>, <8 x i16>* %b
   1666   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1667   %3 = icmp ult <8 x i16> %1, %2
   1668   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1669   ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1670   store <8 x i16> %4, <8 x i16>* %c
   1671   ; CHECK-DAG: st.h [[R3]], 0($4)
   1672 
   1673   ret void
   1674   ; CHECK: .size min_u_v8i16
   1675 }
   1676 
   1677 define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1678   ; CHECK: min_u_v4i32:
   1679 
   1680   %1 = load <4 x i32>, <4 x i32>* %a
   1681   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1682   %2 = load <4 x i32>, <4 x i32>* %b
   1683   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1684   %3 = icmp ult <4 x i32> %1, %2
   1685   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1686   ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1687   store <4 x i32> %4, <4 x i32>* %c
   1688   ; CHECK-DAG: st.w [[R3]], 0($4)
   1689 
   1690   ret void
   1691   ; CHECK: .size min_u_v4i32
   1692 }
   1693 
   1694 define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1695   ; CHECK: min_u_v2i64:
   1696 
   1697   %1 = load <2 x i64>, <2 x i64>* %a
   1698   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1699   %2 = load <2 x i64>, <2 x i64>* %b
   1700   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1701   %3 = icmp ult <2 x i64> %1, %2
   1702   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1703   ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1704   store <2 x i64> %4, <2 x i64>* %c
   1705   ; CHECK-DAG: st.d [[R3]], 0($4)
   1706 
   1707   ret void
   1708   ; CHECK: .size min_u_v2i64
   1709 }
   1710 
   1711 define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1712   ; CHECK: min_s_eq_v16i8:
   1713 
   1714   %1 = load <16 x i8>, <16 x i8>* %a
   1715   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1716   %2 = load <16 x i8>, <16 x i8>* %b
   1717   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1718   %3 = icmp sle <16 x i8> %1, %2
   1719   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1720   ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1721   store <16 x i8> %4, <16 x i8>* %c
   1722   ; CHECK-DAG: st.b [[R3]], 0($4)
   1723 
   1724   ret void
   1725   ; CHECK: .size min_s_eq_v16i8
   1726 }
   1727 
   1728 define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1729   ; CHECK: min_s_eq_v8i16:
   1730 
   1731   %1 = load <8 x i16>, <8 x i16>* %a
   1732   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1733   %2 = load <8 x i16>, <8 x i16>* %b
   1734   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1735   %3 = icmp sle <8 x i16> %1, %2
   1736   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1737   ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1738   store <8 x i16> %4, <8 x i16>* %c
   1739   ; CHECK-DAG: st.h [[R3]], 0($4)
   1740 
   1741   ret void
   1742   ; CHECK: .size min_s_eq_v8i16
   1743 }
   1744 
   1745 define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1746   ; CHECK: min_s_eq_v4i32:
   1747 
   1748   %1 = load <4 x i32>, <4 x i32>* %a
   1749   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1750   %2 = load <4 x i32>, <4 x i32>* %b
   1751   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1752   %3 = icmp sle <4 x i32> %1, %2
   1753   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1754   ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1755   store <4 x i32> %4, <4 x i32>* %c
   1756   ; CHECK-DAG: st.w [[R3]], 0($4)
   1757 
   1758   ret void
   1759   ; CHECK: .size min_s_eq_v4i32
   1760 }
   1761 
   1762 define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1763   ; CHECK: min_s_eq_v2i64:
   1764 
   1765   %1 = load <2 x i64>, <2 x i64>* %a
   1766   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1767   %2 = load <2 x i64>, <2 x i64>* %b
   1768   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1769   %3 = icmp sle <2 x i64> %1, %2
   1770   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1771   ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1772   store <2 x i64> %4, <2 x i64>* %c
   1773   ; CHECK-DAG: st.d [[R3]], 0($4)
   1774 
   1775   ret void
   1776   ; CHECK: .size min_s_eq_v2i64
   1777 }
   1778 
   1779 define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1780   ; CHECK: min_u_eq_v16i8:
   1781 
   1782   %1 = load <16 x i8>, <16 x i8>* %a
   1783   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1784   %2 = load <16 x i8>, <16 x i8>* %b
   1785   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1786   %3 = icmp ule <16 x i8> %1, %2
   1787   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
   1788   ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1789   store <16 x i8> %4, <16 x i8>* %c
   1790   ; CHECK-DAG: st.b [[R3]], 0($4)
   1791 
   1792   ret void
   1793   ; CHECK: .size min_u_eq_v16i8
   1794 }
   1795 
   1796 define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1797   ; CHECK: min_u_eq_v8i16:
   1798 
   1799   %1 = load <8 x i16>, <8 x i16>* %a
   1800   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1801   %2 = load <8 x i16>, <8 x i16>* %b
   1802   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1803   %3 = icmp ule <8 x i16> %1, %2
   1804   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
   1805   ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1806   store <8 x i16> %4, <8 x i16>* %c
   1807   ; CHECK-DAG: st.h [[R3]], 0($4)
   1808 
   1809   ret void
   1810   ; CHECK: .size min_u_eq_v8i16
   1811 }
   1812 
   1813 define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1814   ; CHECK: min_u_eq_v4i32:
   1815 
   1816   %1 = load <4 x i32>, <4 x i32>* %a
   1817   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1818   %2 = load <4 x i32>, <4 x i32>* %b
   1819   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1820   %3 = icmp ule <4 x i32> %1, %2
   1821   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
   1822   ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1823   store <4 x i32> %4, <4 x i32>* %c
   1824   ; CHECK-DAG: st.w [[R3]], 0($4)
   1825 
   1826   ret void
   1827   ; CHECK: .size min_u_eq_v4i32
   1828 }
   1829 
   1830 define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1831   ; CHECK: min_u_eq_v2i64:
   1832 
   1833   %1 = load <2 x i64>, <2 x i64>* %a
   1834   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1835   %2 = load <2 x i64>, <2 x i64>* %b
   1836   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1837   %3 = icmp ule <2 x i64> %1, %2
   1838   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
   1839   ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1840   store <2 x i64> %4, <2 x i64>* %c
   1841   ; CHECK-DAG: st.d [[R3]], 0($4)
   1842 
   1843   ret void
   1844   ; CHECK: .size min_u_eq_v2i64
   1845 }
   1846 
   1847 define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1848   ; CHECK: mini_s_v16i8:
   1849 
   1850   %1 = load <16 x i8>, <16 x i8>* %a
   1851   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1852   %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1853   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1854   ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
   1855   store <16 x i8> %3, <16 x i8>* %c
   1856   ; CHECK-DAG: st.b [[R3]], 0($4)
   1857 
   1858   ret void
   1859   ; CHECK: .size mini_s_v16i8
   1860 }
   1861 
   1862 define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1863   ; CHECK: mini_s_v8i16:
   1864 
   1865   %1 = load <8 x i16>, <8 x i16>* %a
   1866   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1867   %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1868   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1869   ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
   1870   store <8 x i16> %3, <8 x i16>* %c
   1871   ; CHECK-DAG: st.h [[R3]], 0($4)
   1872 
   1873   ret void
   1874   ; CHECK: .size mini_s_v8i16
   1875 }
   1876 
   1877 define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1878   ; CHECK: mini_s_v4i32:
   1879 
   1880   %1 = load <4 x i32>, <4 x i32>* %a
   1881   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1882   %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   1883   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   1884   ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
   1885   store <4 x i32> %3, <4 x i32>* %c
   1886   ; CHECK-DAG: st.w [[R3]], 0($4)
   1887 
   1888   ret void
   1889   ; CHECK: .size mini_s_v4i32
   1890 }
   1891 
   1892 define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1893   ; CHECK: mini_s_v2i64:
   1894 
   1895   %1 = load <2 x i64>, <2 x i64>* %a
   1896   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1897   %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
   1898   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   1899   ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
   1900   store <2 x i64> %3, <2 x i64>* %c
   1901   ; CHECK-DAG: st.d [[R3]], 0($4)
   1902 
   1903   ret void
   1904   ; CHECK: .size mini_s_v2i64
   1905 }
   1906 
   1907 define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1908   ; CHECK: mini_u_v16i8:
   1909 
   1910   %1 = load <16 x i8>, <16 x i8>* %a
   1911   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1912   %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1913   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1914   ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
   1915   store <16 x i8> %3, <16 x i8>* %c
   1916   ; CHECK-DAG: st.b [[R3]], 0($4)
   1917 
   1918   ret void
   1919   ; CHECK: .size mini_u_v16i8
   1920 }
   1921 
   1922 define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1923   ; CHECK: mini_u_v8i16:
   1924 
   1925   %1 = load <8 x i16>, <8 x i16>* %a
   1926   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1927   %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1928   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1929   ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
   1930   store <8 x i16> %3, <8 x i16>* %c
   1931   ; CHECK-DAG: st.h [[R3]], 0($4)
   1932 
   1933   ret void
   1934   ; CHECK: .size mini_u_v8i16
   1935 }
   1936 
   1937 define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1938   ; CHECK: mini_u_v4i32:
   1939 
   1940   %1 = load <4 x i32>, <4 x i32>* %a
   1941   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1942   %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   1943   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   1944   ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
   1945   store <4 x i32> %3, <4 x i32>* %c
   1946   ; CHECK-DAG: st.w [[R3]], 0($4)
   1947 
   1948   ret void
   1949   ; CHECK: .size mini_u_v4i32
   1950 }
   1951 
   1952 define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1953   ; CHECK: mini_u_v2i64:
   1954 
   1955   %1 = load <2 x i64>, <2 x i64>* %a
   1956   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1957   %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
   1958   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   1959   ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
   1960   store <2 x i64> %3, <2 x i64>* %c
   1961   ; CHECK-DAG: st.d [[R3]], 0($4)
   1962 
   1963   ret void
   1964   ; CHECK: .size mini_u_v2i64
   1965 }
   1966 
   1967 define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1968   ; CHECK: mini_s_eq_v16i8:
   1969 
   1970   %1 = load <16 x i8>, <16 x i8>* %a
   1971   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1972   %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1973   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1974   ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
   1975   store <16 x i8> %3, <16 x i8>* %c
   1976   ; CHECK-DAG: st.b [[R3]], 0($4)
   1977 
   1978   ret void
   1979   ; CHECK: .size mini_s_eq_v16i8
   1980 }
   1981 
   1982 define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1983   ; CHECK: mini_s_eq_v8i16:
   1984 
   1985   %1 = load <8 x i16>, <8 x i16>* %a
   1986   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1987   %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1988   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1989   ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
   1990   store <8 x i16> %3, <8 x i16>* %c
   1991   ; CHECK-DAG: st.h [[R3]], 0($4)
   1992 
   1993   ret void
   1994   ; CHECK: .size mini_s_eq_v8i16
   1995 }
   1996 
   1997 define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1998   ; CHECK: mini_s_eq_v4i32:
   1999 
   2000   %1 = load <4 x i32>, <4 x i32>* %a
   2001   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   2002   %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   2003   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   2004   ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
   2005   store <4 x i32> %3, <4 x i32>* %c
   2006   ; CHECK-DAG: st.w [[R3]], 0($4)
   2007 
   2008   ret void
   2009   ; CHECK: .size mini_s_eq_v4i32
   2010 }
   2011 
   2012 define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   2013   ; CHECK: mini_s_eq_v2i64:
   2014 
   2015   %1 = load <2 x i64>, <2 x i64>* %a
   2016   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   2017   %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
   2018   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   2019   ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
   2020   store <2 x i64> %3, <2 x i64>* %c
   2021   ; CHECK-DAG: st.d [[R3]], 0($4)
   2022 
   2023   ret void
   2024   ; CHECK: .size mini_s_eq_v2i64
   2025 }
   2026 
   2027 define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   2028   ; CHECK: mini_u_eq_v16i8:
   2029 
   2030   %1 = load <16 x i8>, <16 x i8>* %a
   2031   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   2032   %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   2033   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   2034   ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
   2035   store <16 x i8> %3, <16 x i8>* %c
   2036   ; CHECK-DAG: st.b [[R3]], 0($4)
   2037 
   2038   ret void
   2039   ; CHECK: .size mini_u_eq_v16i8
   2040 }
   2041 
   2042 define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   2043   ; CHECK: mini_u_eq_v8i16:
   2044 
   2045   %1 = load <8 x i16>, <8 x i16>* %a
   2046   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   2047   %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   2048   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   2049   ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
   2050   store <8 x i16> %3, <8 x i16>* %c
   2051   ; CHECK-DAG: st.h [[R3]], 0($4)
   2052 
   2053   ret void
   2054   ; CHECK: .size mini_u_eq_v8i16
   2055 }
   2056 
   2057 define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   2058   ; CHECK: mini_u_eq_v4i32:
   2059 
   2060   %1 = load <4 x i32>, <4 x i32>* %a
   2061   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   2062   %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   2063   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   2064   ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
   2065   store <4 x i32> %3, <4 x i32>* %c
   2066   ; CHECK-DAG: st.w [[R3]], 0($4)
   2067 
   2068   ret void
   2069   ; CHECK: .size mini_u_eq_v4i32
   2070 }
   2071 
   2072 define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   2073   ; CHECK: mini_u_eq_v2i64:
   2074 
   2075   %1 = load <2 x i64>, <2 x i64>* %a
   2076   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   2077   %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
   2078   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
   2079   ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
   2080   store <2 x i64> %3, <2 x i64>* %c
   2081   ; CHECK-DAG: st.d [[R3]], 0($4)
   2082 
   2083   ret void
   2084   ; CHECK: .size mini_u_eq_v2i64
   2085 }
   2086