Home | History | Annotate | Download | only in msa
      1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
      2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
      3 
      4 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
      5   ; CHECK: add_v16i8:
      6 
      7   %1 = load <16 x i8>, <16 x i8>* %a
      8   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
      9   %2 = load <16 x i8>, <16 x i8>* %b
     10   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     11   %3 = add <16 x i8> %1, %2
     12   ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     13   store <16 x i8> %3, <16 x i8>* %c
     14   ; CHECK-DAG: st.b [[R3]], 0($4)
     15 
     16   ret void
     17   ; CHECK: .size add_v16i8
     18 }
     19 
     20 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     21   ; CHECK: add_v8i16:
     22 
     23   %1 = load <8 x i16>, <8 x i16>* %a
     24   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     25   %2 = load <8 x i16>, <8 x i16>* %b
     26   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
     27   %3 = add <8 x i16> %1, %2
     28   ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     29   store <8 x i16> %3, <8 x i16>* %c
     30   ; CHECK-DAG: st.h [[R3]], 0($4)
     31 
     32   ret void
     33   ; CHECK: .size add_v8i16
     34 }
     35 
     36 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
     37   ; CHECK: add_v4i32:
     38 
     39   %1 = load <4 x i32>, <4 x i32>* %a
     40   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
     41   %2 = load <4 x i32>, <4 x i32>* %b
     42   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
     43   %3 = add <4 x i32> %1, %2
     44   ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     45   store <4 x i32> %3, <4 x i32>* %c
     46   ; CHECK-DAG: st.w [[R3]], 0($4)
     47 
     48   ret void
     49   ; CHECK: .size add_v4i32
     50 }
     51 
     52 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
     53   ; CHECK: add_v2i64:
     54 
     55   %1 = load <2 x i64>, <2 x i64>* %a
     56   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
     57   %2 = load <2 x i64>, <2 x i64>* %b
     58   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
     59   %3 = add <2 x i64> %1, %2
     60   ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
     61   store <2 x i64> %3, <2 x i64>* %c
     62   ; CHECK-DAG: st.d [[R3]], 0($4)
     63 
     64   ret void
     65   ; CHECK: .size add_v2i64
     66 }
     67 
     68 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
     69   ; CHECK: add_v16i8_i:
     70 
     71   %1 = load <16 x i8>, <16 x i8>* %a
     72   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     73   %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
     74                           i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
     75   ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
     76   store <16 x i8> %2, <16 x i8>* %c
     77   ; CHECK-DAG: st.b [[R3]], 0($4)
     78 
     79   ret void
     80   ; CHECK: .size add_v16i8_i
     81 }
     82 
     83 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
     84   ; CHECK: add_v8i16_i:
     85 
     86   %1 = load <8 x i16>, <8 x i16>* %a
     87   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     88   %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
     89                           i16 1, i16 1, i16 1, i16 1>
     90   ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
     91   store <8 x i16> %2, <8 x i16>* %c
     92   ; CHECK-DAG: st.h [[R3]], 0($4)
     93 
     94   ret void
     95   ; CHECK: .size add_v8i16_i
     96 }
     97 
     98 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
     99   ; CHECK: add_v4i32_i:
    100 
    101   %1 = load <4 x i32>, <4 x i32>* %a
    102   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    103   %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    104   ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
    105   store <4 x i32> %2, <4 x i32>* %c
    106   ; CHECK-DAG: st.w [[R3]], 0($4)
    107 
    108   ret void
    109   ; CHECK: .size add_v4i32_i
    110 }
    111 
    112 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    113   ; CHECK: add_v2i64_i:
    114 
    115   %1 = load <2 x i64>, <2 x i64>* %a
    116   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    117   %2 = add <2 x i64> %1, <i64 1, i64 1>
    118   ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
    119   store <2 x i64> %2, <2 x i64>* %c
    120   ; CHECK-DAG: st.d [[R3]], 0($4)
    121 
    122   ret void
    123   ; CHECK: .size add_v2i64_i
    124 }
    125 
    126 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    127   ; CHECK: sub_v16i8:
    128 
    129   %1 = load <16 x i8>, <16 x i8>* %a
    130   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    131   %2 = load <16 x i8>, <16 x i8>* %b
    132   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    133   %3 = sub <16 x i8> %1, %2
    134   ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    135   store <16 x i8> %3, <16 x i8>* %c
    136   ; CHECK-DAG: st.b [[R3]], 0($4)
    137 
    138   ret void
    139   ; CHECK: .size sub_v16i8
    140 }
    141 
    142 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    143   ; CHECK: sub_v8i16:
    144 
    145   %1 = load <8 x i16>, <8 x i16>* %a
    146   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    147   %2 = load <8 x i16>, <8 x i16>* %b
    148   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    149   %3 = sub <8 x i16> %1, %2
    150   ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    151   store <8 x i16> %3, <8 x i16>* %c
    152   ; CHECK-DAG: st.h [[R3]], 0($4)
    153 
    154   ret void
    155   ; CHECK: .size sub_v8i16
    156 }
    157 
    158 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    159   ; CHECK: sub_v4i32:
    160 
    161   %1 = load <4 x i32>, <4 x i32>* %a
    162   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    163   %2 = load <4 x i32>, <4 x i32>* %b
    164   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    165   %3 = sub <4 x i32> %1, %2
    166   ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    167   store <4 x i32> %3, <4 x i32>* %c
    168   ; CHECK-DAG: st.w [[R3]], 0($4)
    169 
    170   ret void
    171   ; CHECK: .size sub_v4i32
    172 }
    173 
    174 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    175   ; CHECK: sub_v2i64:
    176 
    177   %1 = load <2 x i64>, <2 x i64>* %a
    178   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    179   %2 = load <2 x i64>, <2 x i64>* %b
    180   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    181   %3 = sub <2 x i64> %1, %2
    182   ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    183   store <2 x i64> %3, <2 x i64>* %c
    184   ; CHECK-DAG: st.d [[R3]], 0($4)
    185 
    186   ret void
    187   ; CHECK: .size sub_v2i64
    188 }
    189 
    190 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    191   ; CHECK: sub_v16i8_i:
    192 
    193   %1 = load <16 x i8>, <16 x i8>* %a
    194   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    195   %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
    196                           i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    197   ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
    198   store <16 x i8> %2, <16 x i8>* %c
    199   ; CHECK-DAG: st.b [[R3]], 0($4)
    200 
    201   ret void
    202   ; CHECK: .size sub_v16i8_i
    203 }
    204 
    205 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    206   ; CHECK: sub_v8i16_i:
    207 
    208   %1 = load <8 x i16>, <8 x i16>* %a
    209   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    210   %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
    211                           i16 1, i16 1, i16 1, i16 1>
    212   ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
    213   store <8 x i16> %2, <8 x i16>* %c
    214   ; CHECK-DAG: st.h [[R3]], 0($4)
    215 
    216   ret void
    217   ; CHECK: .size sub_v8i16_i
    218 }
    219 
    220 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    221   ; CHECK: sub_v4i32_i:
    222 
    223   %1 = load <4 x i32>, <4 x i32>* %a
    224   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    225   %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
    226   ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
    227   store <4 x i32> %2, <4 x i32>* %c
    228   ; CHECK-DAG: st.w [[R3]], 0($4)
    229 
    230   ret void
    231   ; CHECK: .size sub_v4i32_i
    232 }
    233 
    234 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    235   ; CHECK: sub_v2i64_i:
    236 
    237   %1 = load <2 x i64>, <2 x i64>* %a
    238   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    239   %2 = sub <2 x i64> %1, <i64 1, i64 1>
    240   ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
    241   store <2 x i64> %2, <2 x i64>* %c
    242   ; CHECK-DAG: st.d [[R3]], 0($4)
    243 
    244   ret void
    245   ; CHECK: .size sub_v2i64_i
    246 }
    247 
    248 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    249   ; CHECK: mul_v16i8:
    250 
    251   %1 = load <16 x i8>, <16 x i8>* %a
    252   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    253   %2 = load <16 x i8>, <16 x i8>* %b
    254   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    255   %3 = mul <16 x i8> %1, %2
    256   ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    257   store <16 x i8> %3, <16 x i8>* %c
    258   ; CHECK-DAG: st.b [[R3]], 0($4)
    259 
    260   ret void
    261   ; CHECK: .size mul_v16i8
    262 }
    263 
    264 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    265   ; CHECK: mul_v8i16:
    266 
    267   %1 = load <8 x i16>, <8 x i16>* %a
    268   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    269   %2 = load <8 x i16>, <8 x i16>* %b
    270   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    271   %3 = mul <8 x i16> %1, %2
    272   ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    273   store <8 x i16> %3, <8 x i16>* %c
    274   ; CHECK-DAG: st.h [[R3]], 0($4)
    275 
    276   ret void
    277   ; CHECK: .size mul_v8i16
    278 }
    279 
    280 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    281   ; CHECK: mul_v4i32:
    282 
    283   %1 = load <4 x i32>, <4 x i32>* %a
    284   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    285   %2 = load <4 x i32>, <4 x i32>* %b
    286   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    287   %3 = mul <4 x i32> %1, %2
    288   ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    289   store <4 x i32> %3, <4 x i32>* %c
    290   ; CHECK-DAG: st.w [[R3]], 0($4)
    291 
    292   ret void
    293   ; CHECK: .size mul_v4i32
    294 }
    295 
    296 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    297   ; CHECK: mul_v2i64:
    298 
    299   %1 = load <2 x i64>, <2 x i64>* %a
    300   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    301   %2 = load <2 x i64>, <2 x i64>* %b
    302   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    303   %3 = mul <2 x i64> %1, %2
    304   ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    305   store <2 x i64> %3, <2 x i64>* %c
    306   ; CHECK-DAG: st.d [[R3]], 0($4)
    307 
    308   ret void
    309   ; CHECK: .size mul_v2i64
    310 }
    311 
    312 define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
    313                          <16 x i8>* %c) nounwind {
    314   ; CHECK: maddv_v16i8:
    315 
    316   %1 = load <16 x i8>, <16 x i8>* %a
    317   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    318   %2 = load <16 x i8>, <16 x i8>* %b
    319   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    320   %3 = load <16 x i8>, <16 x i8>* %c
    321   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
    322   %4 = mul <16 x i8> %2, %3
    323   %5 = add <16 x i8> %4, %1
    324   ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
    325   store <16 x i8> %5, <16 x i8>* %d
    326   ; CHECK-DAG: st.b [[R1]], 0($4)
    327 
    328   ret void
    329   ; CHECK: .size maddv_v16i8
    330 }
    331 
    332 define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
    333                          <8 x i16>* %c) nounwind {
    334   ; CHECK: maddv_v8i16:
    335 
    336   %1 = load <8 x i16>, <8 x i16>* %a
    337   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    338   %2 = load <8 x i16>, <8 x i16>* %b
    339   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    340   %3 = load <8 x i16>, <8 x i16>* %c
    341   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
    342   %4 = mul <8 x i16> %2, %3
    343   %5 = add <8 x i16> %4, %1
    344   ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
    345   store <8 x i16> %5, <8 x i16>* %d
    346   ; CHECK-DAG: st.h [[R1]], 0($4)
    347 
    348   ret void
    349   ; CHECK: .size maddv_v8i16
    350 }
    351 
    352 define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
    353                          <4 x i32>* %c) nounwind {
    354   ; CHECK: maddv_v4i32:
    355 
    356   %1 = load <4 x i32>, <4 x i32>* %a
    357   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    358   %2 = load <4 x i32>, <4 x i32>* %b
    359   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    360   %3 = load <4 x i32>, <4 x i32>* %c
    361   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
    362   %4 = mul <4 x i32> %2, %3
    363   %5 = add <4 x i32> %4, %1
    364   ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
    365   store <4 x i32> %5, <4 x i32>* %d
    366   ; CHECK-DAG: st.w [[R1]], 0($4)
    367 
    368   ret void
    369   ; CHECK: .size maddv_v4i32
    370 }
    371 
    372 define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
    373                          <2 x i64>* %c) nounwind {
    374   ; CHECK: maddv_v2i64:
    375 
    376   %1 = load <2 x i64>, <2 x i64>* %a
    377   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    378   %2 = load <2 x i64>, <2 x i64>* %b
    379   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    380   %3 = load <2 x i64>, <2 x i64>* %c
    381   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
    382   %4 = mul <2 x i64> %2, %3
    383   %5 = add <2 x i64> %4, %1
    384   ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
    385   store <2 x i64> %5, <2 x i64>* %d
    386   ; CHECK-DAG: st.d [[R1]], 0($4)
    387 
    388   ret void
    389   ; CHECK: .size maddv_v2i64
    390 }
    391 
    392 define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
    393                          <16 x i8>* %c) nounwind {
    394   ; CHECK: msubv_v16i8:
    395 
    396   %1 = load <16 x i8>, <16 x i8>* %a
    397   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    398   %2 = load <16 x i8>, <16 x i8>* %b
    399   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    400   %3 = load <16 x i8>, <16 x i8>* %c
    401   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
    402   %4 = mul <16 x i8> %2, %3
    403   %5 = sub <16 x i8> %1, %4
    404   ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
    405   store <16 x i8> %5, <16 x i8>* %d
    406   ; CHECK-DAG: st.b [[R1]], 0($4)
    407 
    408   ret void
    409   ; CHECK: .size msubv_v16i8
    410 }
    411 
    412 define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
    413                          <8 x i16>* %c) nounwind {
    414   ; CHECK: msubv_v8i16:
    415 
    416   %1 = load <8 x i16>, <8 x i16>* %a
    417   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    418   %2 = load <8 x i16>, <8 x i16>* %b
    419   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    420   %3 = load <8 x i16>, <8 x i16>* %c
    421   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
    422   %4 = mul <8 x i16> %2, %3
    423   %5 = sub <8 x i16> %1, %4
    424   ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
    425   store <8 x i16> %5, <8 x i16>* %d
    426   ; CHECK-DAG: st.h [[R1]], 0($4)
    427 
    428   ret void
    429   ; CHECK: .size msubv_v8i16
    430 }
    431 
    432 define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
    433                          <4 x i32>* %c) nounwind {
    434   ; CHECK: msubv_v4i32:
    435 
    436   %1 = load <4 x i32>, <4 x i32>* %a
    437   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    438   %2 = load <4 x i32>, <4 x i32>* %b
    439   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    440   %3 = load <4 x i32>, <4 x i32>* %c
    441   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
    442   %4 = mul <4 x i32> %2, %3
    443   %5 = sub <4 x i32> %1, %4
    444   ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
    445   store <4 x i32> %5, <4 x i32>* %d
    446   ; CHECK-DAG: st.w [[R1]], 0($4)
    447 
    448   ret void
    449   ; CHECK: .size msubv_v4i32
    450 }
    451 
    452 define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
    453                          <2 x i64>* %c) nounwind {
    454   ; CHECK: msubv_v2i64:
    455 
    456   %1 = load <2 x i64>, <2 x i64>* %a
    457   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    458   %2 = load <2 x i64>, <2 x i64>* %b
    459   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    460   %3 = load <2 x i64>, <2 x i64>* %c
    461   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
    462   %4 = mul <2 x i64> %2, %3
    463   %5 = sub <2 x i64> %1, %4
    464   ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
    465   store <2 x i64> %5, <2 x i64>* %d
    466   ; CHECK-DAG: st.d [[R1]], 0($4)
    467 
    468   ret void
    469   ; CHECK: .size msubv_v2i64
    470 }
    471 
    472 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    473   ; CHECK: div_s_v16i8:
    474 
    475   %1 = load <16 x i8>, <16 x i8>* %a
    476   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    477   %2 = load <16 x i8>, <16 x i8>* %b
    478   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    479   %3 = sdiv <16 x i8> %1, %2
    480   ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    481   store <16 x i8> %3, <16 x i8>* %c
    482   ; CHECK-DAG: st.b [[R3]], 0($4)
    483 
    484   ret void
    485   ; CHECK: .size div_s_v16i8
    486 }
    487 
    488 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    489   ; CHECK: div_s_v8i16:
    490 
    491   %1 = load <8 x i16>, <8 x i16>* %a
    492   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    493   %2 = load <8 x i16>, <8 x i16>* %b
    494   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    495   %3 = sdiv <8 x i16> %1, %2
    496   ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    497   store <8 x i16> %3, <8 x i16>* %c
    498   ; CHECK-DAG: st.h [[R3]], 0($4)
    499 
    500   ret void
    501   ; CHECK: .size div_s_v8i16
    502 }
    503 
    504 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    505   ; CHECK: div_s_v4i32:
    506 
    507   %1 = load <4 x i32>, <4 x i32>* %a
    508   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    509   %2 = load <4 x i32>, <4 x i32>* %b
    510   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    511   %3 = sdiv <4 x i32> %1, %2
    512   ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    513   store <4 x i32> %3, <4 x i32>* %c
    514   ; CHECK-DAG: st.w [[R3]], 0($4)
    515 
    516   ret void
    517   ; CHECK: .size div_s_v4i32
    518 }
    519 
    520 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    521   ; CHECK: div_s_v2i64:
    522 
    523   %1 = load <2 x i64>, <2 x i64>* %a
    524   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    525   %2 = load <2 x i64>, <2 x i64>* %b
    526   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    527   %3 = sdiv <2 x i64> %1, %2
    528   ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    529   store <2 x i64> %3, <2 x i64>* %c
    530   ; CHECK-DAG: st.d [[R3]], 0($4)
    531 
    532   ret void
    533   ; CHECK: .size div_s_v2i64
    534 }
    535 
    536 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    537   ; CHECK: div_u_v16i8:
    538 
    539   %1 = load <16 x i8>, <16 x i8>* %a
    540   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    541   %2 = load <16 x i8>, <16 x i8>* %b
    542   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    543   %3 = udiv <16 x i8> %1, %2
    544   ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    545   store <16 x i8> %3, <16 x i8>* %c
    546   ; CHECK-DAG: st.b [[R3]], 0($4)
    547 
    548   ret void
    549   ; CHECK: .size div_u_v16i8
    550 }
    551 
    552 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    553   ; CHECK: div_u_v8i16:
    554 
    555   %1 = load <8 x i16>, <8 x i16>* %a
    556   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    557   %2 = load <8 x i16>, <8 x i16>* %b
    558   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    559   %3 = udiv <8 x i16> %1, %2
    560   ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    561   store <8 x i16> %3, <8 x i16>* %c
    562   ; CHECK-DAG: st.h [[R3]], 0($4)
    563 
    564   ret void
    565   ; CHECK: .size div_u_v8i16
    566 }
    567 
    568 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    569   ; CHECK: div_u_v4i32:
    570 
    571   %1 = load <4 x i32>, <4 x i32>* %a
    572   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    573   %2 = load <4 x i32>, <4 x i32>* %b
    574   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    575   %3 = udiv <4 x i32> %1, %2
    576   ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    577   store <4 x i32> %3, <4 x i32>* %c
    578   ; CHECK-DAG: st.w [[R3]], 0($4)
    579 
    580   ret void
    581   ; CHECK: .size div_u_v4i32
    582 }
    583 
    584 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    585   ; CHECK: div_u_v2i64:
    586 
    587   %1 = load <2 x i64>, <2 x i64>* %a
    588   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    589   %2 = load <2 x i64>, <2 x i64>* %b
    590   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    591   %3 = udiv <2 x i64> %1, %2
    592   ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    593   store <2 x i64> %3, <2 x i64>* %c
    594   ; CHECK-DAG: st.d [[R3]], 0($4)
    595 
    596   ret void
    597   ; CHECK: .size div_u_v2i64
    598 }
    599 
    600 define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    601   ; CHECK: mod_s_v16i8:
    602 
    603   %1 = load <16 x i8>, <16 x i8>* %a
    604   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    605   %2 = load <16 x i8>, <16 x i8>* %b
    606   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    607   %3 = srem <16 x i8> %1, %2
    608   ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    609   store <16 x i8> %3, <16 x i8>* %c
    610   ; CHECK-DAG: st.b [[R3]], 0($4)
    611 
    612   ret void
    613   ; CHECK: .size mod_s_v16i8
    614 }
    615 
    616 define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    617   ; CHECK: mod_s_v8i16:
    618 
    619   %1 = load <8 x i16>, <8 x i16>* %a
    620   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    621   %2 = load <8 x i16>, <8 x i16>* %b
    622   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    623   %3 = srem <8 x i16> %1, %2
    624   ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    625   store <8 x i16> %3, <8 x i16>* %c
    626   ; CHECK-DAG: st.h [[R3]], 0($4)
    627 
    628   ret void
    629   ; CHECK: .size mod_s_v8i16
    630 }
    631 
    632 define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    633   ; CHECK: mod_s_v4i32:
    634 
    635   %1 = load <4 x i32>, <4 x i32>* %a
    636   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    637   %2 = load <4 x i32>, <4 x i32>* %b
    638   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    639   %3 = srem <4 x i32> %1, %2
    640   ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    641   store <4 x i32> %3, <4 x i32>* %c
    642   ; CHECK-DAG: st.w [[R3]], 0($4)
    643 
    644   ret void
    645   ; CHECK: .size mod_s_v4i32
    646 }
    647 
    648 define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    649   ; CHECK: mod_s_v2i64:
    650 
    651   %1 = load <2 x i64>, <2 x i64>* %a
    652   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    653   %2 = load <2 x i64>, <2 x i64>* %b
    654   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    655   %3 = srem <2 x i64> %1, %2
    656   ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    657   store <2 x i64> %3, <2 x i64>* %c
    658   ; CHECK-DAG: st.d [[R3]], 0($4)
    659 
    660   ret void
    661   ; CHECK: .size mod_s_v2i64
    662 }
    663 
    664 define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    665   ; CHECK: mod_u_v16i8:
    666 
    667   %1 = load <16 x i8>, <16 x i8>* %a
    668   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    669   %2 = load <16 x i8>, <16 x i8>* %b
    670   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    671   %3 = urem <16 x i8> %1, %2
    672   ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    673   store <16 x i8> %3, <16 x i8>* %c
    674   ; CHECK-DAG: st.b [[R3]], 0($4)
    675 
    676   ret void
    677   ; CHECK: .size mod_u_v16i8
    678 }
    679 
    680 define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    681   ; CHECK: mod_u_v8i16:
    682 
    683   %1 = load <8 x i16>, <8 x i16>* %a
    684   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    685   %2 = load <8 x i16>, <8 x i16>* %b
    686   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    687   %3 = urem <8 x i16> %1, %2
    688   ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    689   store <8 x i16> %3, <8 x i16>* %c
    690   ; CHECK-DAG: st.h [[R3]], 0($4)
    691 
    692   ret void
    693   ; CHECK: .size mod_u_v8i16
    694 }
    695 
    696 define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    697   ; CHECK: mod_u_v4i32:
    698 
    699   %1 = load <4 x i32>, <4 x i32>* %a
    700   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    701   %2 = load <4 x i32>, <4 x i32>* %b
    702   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    703   %3 = urem <4 x i32> %1, %2
    704   ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    705   store <4 x i32> %3, <4 x i32>* %c
    706   ; CHECK-DAG: st.w [[R3]], 0($4)
    707 
    708   ret void
    709   ; CHECK: .size mod_u_v4i32
    710 }
    711 
    712 define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    713   ; CHECK: mod_u_v2i64:
    714 
    715   %1 = load <2 x i64>, <2 x i64>* %a
    716   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    717   %2 = load <2 x i64>, <2 x i64>* %b
    718   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    719   %3 = urem <2 x i64> %1, %2
    720   ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    721   store <2 x i64> %3, <2 x i64>* %c
    722   ; CHECK-DAG: st.d [[R3]], 0($4)
    723 
    724   ret void
    725   ; CHECK: .size mod_u_v2i64
    726 }
    727