Home | History | Annotate | Download | only in msa
      1 ; Test the bitcast operation for big-endian and little-endian.
      2 
      3 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s
      4 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s
      5 
      6 define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
      7 entry:
      8   %0 = load volatile <16 x i8>* %src
      9   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
     10   %2 = bitcast <16 x i8> %1 to <16 x i8>
     11   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
     12   store <16 x i8> %3, <16 x i8>* %dst
     13   ret void
     14 }
     15 
     16 ; LITENDIAN: v16i8_to_v16i8:
     17 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
     18 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     19 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
     20 ; LITENDIAN: st.b [[R3]],
     21 ; LITENDIAN: .size v16i8_to_v16i8
     22 
     23 ; BIGENDIAN: v16i8_to_v16i8:
     24 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
     25 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     26 ; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
     27 ; BIGENDIAN: st.b [[R3]],
     28 ; BIGENDIAN: .size v16i8_to_v16i8
     29 
     30 define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
     31 entry:
     32   %0 = load volatile <16 x i8>* %src
     33   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
     34   %2 = bitcast <16 x i8> %1 to <8 x i16>
     35   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
     36   store <8 x i16> %3, <8 x i16>* %dst
     37   ret void
     38 }
     39 
     40 ; LITENDIAN: v16i8_to_v8i16:
     41 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
     42 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     43 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
     44 ; LITENDIAN: st.h [[R3]],
     45 ; LITENDIAN: .size v16i8_to_v8i16
     46 
     47 ; BIGENDIAN: v16i8_to_v8i16:
     48 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
     49 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     50 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
     51 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
     52 ; BIGENDIAN: st.h [[R4]],
     53 ; BIGENDIAN: .size v16i8_to_v8i16
     54 
     55 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
     56 ; are no operations for v8f16 to put in the way.
     57 define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
     58 entry:
     59   %0 = load volatile <16 x i8>* %src
     60   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
     61   %2 = bitcast <16 x i8> %1 to <8 x half>
     62   store <8 x half> %2, <8 x half>* %dst
     63   ret void
     64 }
     65 
     66 ; LITENDIAN: v16i8_to_v8f16:
     67 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
     68 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     69 ; LITENDIAN: st.b [[R2]],
     70 ; LITENDIAN: .size v16i8_to_v8f16
     71 
     72 ; BIGENDIAN: v16i8_to_v8f16:
     73 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
     74 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     75 ; BIGENDIAN: st.b [[R2]],
     76 ; BIGENDIAN: .size v16i8_to_v8f16
     77 
     78 define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
     79 entry:
     80   %0 = load volatile <16 x i8>* %src
     81   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
     82   %2 = bitcast <16 x i8> %1 to <4 x i32>
     83   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
     84   store <4 x i32> %3, <4 x i32>* %dst
     85   ret void
     86 }
     87 
     88 ; LITENDIAN: v16i8_to_v4i32:
     89 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
     90 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     91 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
     92 ; LITENDIAN: st.w [[R3]],
     93 ; LITENDIAN: .size v16i8_to_v4i32
     94 
     95 ; BIGENDIAN: v16i8_to_v4i32:
     96 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
     97 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
     98 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
     99 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    100 ; BIGENDIAN: st.w [[R4]],
    101 ; BIGENDIAN: .size v16i8_to_v4i32
    102 
    103 define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
    104 entry:
    105   %0 = load volatile <16 x i8>* %src
    106   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
    107   %2 = bitcast <16 x i8> %1 to <4 x float>
    108   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
    109   store <4 x float> %3, <4 x float>* %dst
    110   ret void
    111 }
    112 
    113 ; LITENDIAN: v16i8_to_v4f32:
    114 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
    115 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    116 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    117 ; LITENDIAN: st.w [[R3]],
    118 ; LITENDIAN: .size v16i8_to_v4f32
    119 
    120 ; BIGENDIAN: v16i8_to_v4f32:
    121 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
    122 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    123 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
    124 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    125 ; BIGENDIAN: st.w [[R4]],
    126 ; BIGENDIAN: .size v16i8_to_v4f32
    127 
    128 define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
    129 entry:
    130   %0 = load volatile <16 x i8>* %src
    131   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
    132   %2 = bitcast <16 x i8> %1 to <2 x i64>
    133   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
    134   store <2 x i64> %3, <2 x i64>* %dst
    135   ret void
    136 }
    137 
    138 ; LITENDIAN: v16i8_to_v2i64:
    139 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
    140 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    141 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    142 ; LITENDIAN: st.d [[R3]],
    143 ; LITENDIAN: .size v16i8_to_v2i64
    144 
    145 ; BIGENDIAN: v16i8_to_v2i64:
    146 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
    147 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    148 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
    149 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
    150 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    151 ; BIGENDIAN: st.d [[R4]],
    152 ; BIGENDIAN: .size v16i8_to_v2i64
    153 
    154 define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
    155 entry:
    156   %0 = load volatile <16 x i8>* %src
    157   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
    158   %2 = bitcast <16 x i8> %1 to <2 x double>
    159   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
    160   store <2 x double> %3, <2 x double>* %dst
    161   ret void
    162 }
    163 
    164 ; LITENDIAN: v16i8_to_v2f64:
    165 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
    166 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    167 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    168 ; LITENDIAN: st.d [[R3]],
    169 ; LITENDIAN: .size v16i8_to_v2f64
    170 
    171 ; BIGENDIAN: v16i8_to_v2f64:
    172 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
    173 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    174 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
    175 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
    176 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    177 ; BIGENDIAN: st.d [[R4]],
    178 ; BIGENDIAN: .size v16i8_to_v2f64
    179 
    180 define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
    181 entry:
    182   %0 = load volatile <8 x i16>* %src
    183   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    184   %2 = bitcast <8 x i16> %1 to <16 x i8>
    185   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
    186   store <16 x i8> %3, <16 x i8>* %dst
    187   ret void
    188 }
    189 
    190 ; LITENDIAN: v8i16_to_v16i8:
    191 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    192 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    193 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    194 ; LITENDIAN: st.b [[R3]],
    195 ; LITENDIAN: .size v8i16_to_v16i8
    196 
    197 ; BIGENDIAN: v8i16_to_v16i8:
    198 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    199 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    200 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
    201 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
    202 ; BIGENDIAN: st.b [[R4]],
    203 ; BIGENDIAN: .size v8i16_to_v16i8
    204 
    205 define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
    206 entry:
    207   %0 = load volatile <8 x i16>* %src
    208   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    209   %2 = bitcast <8 x i16> %1 to <8 x i16>
    210   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
    211   store <8 x i16> %3, <8 x i16>* %dst
    212   ret void
    213 }
    214 
    215 ; LITENDIAN: v8i16_to_v8i16:
    216 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    217 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    218 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    219 ; LITENDIAN: st.h [[R3]],
    220 ; LITENDIAN: .size v8i16_to_v8i16
    221 
    222 ; BIGENDIAN: v8i16_to_v8i16:
    223 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    224 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    225 ; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    226 ; BIGENDIAN: st.h [[R3]],
    227 ; BIGENDIAN: .size v8i16_to_v8i16
    228 
    229 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
    230 ; are no operations for v8f16 to put in the way.
    231 define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
    232 entry:
    233   %0 = load volatile <8 x i16>* %src
    234   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    235   %2 = bitcast <8 x i16> %1 to <8 x half>
    236   store <8 x half> %2, <8 x half>* %dst
    237   ret void
    238 }
    239 
    240 ; LITENDIAN: v8i16_to_v8f16:
    241 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    242 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    243 ; LITENDIAN: st.h [[R2]],
    244 ; LITENDIAN: .size v8i16_to_v8f16
    245 
    246 ; BIGENDIAN: v8i16_to_v8f16:
    247 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    248 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    249 ; BIGENDIAN: st.h [[R2]],
    250 ; BIGENDIAN: .size v8i16_to_v8f16
    251 
    252 define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
    253 entry:
    254   %0 = load volatile <8 x i16>* %src
    255   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    256   %2 = bitcast <8 x i16> %1 to <4 x i32>
    257   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
    258   store <4 x i32> %3, <4 x i32>* %dst
    259   ret void
    260 }
    261 
    262 ; LITENDIAN: v8i16_to_v4i32:
    263 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    264 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    265 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    266 ; LITENDIAN: st.w [[R3]],
    267 ; LITENDIAN: .size v8i16_to_v4i32
    268 
    269 ; BIGENDIAN: v8i16_to_v4i32:
    270 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    271 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    272 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
    273 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    274 ; BIGENDIAN: st.w [[R4]],
    275 ; BIGENDIAN: .size v8i16_to_v4i32
    276 
    277 define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
    278 entry:
    279   %0 = load volatile <8 x i16>* %src
    280   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    281   %2 = bitcast <8 x i16> %1 to <4 x float>
    282   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
    283   store <4 x float> %3, <4 x float>* %dst
    284   ret void
    285 }
    286 
    287 ; LITENDIAN: v8i16_to_v4f32:
    288 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    289 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    290 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    291 ; LITENDIAN: st.w [[R3]],
    292 ; LITENDIAN: .size v8i16_to_v4f32
    293 
    294 ; BIGENDIAN: v8i16_to_v4f32:
    295 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    296 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    297 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
    298 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    299 ; BIGENDIAN: st.w [[R4]],
    300 ; BIGENDIAN: .size v8i16_to_v4f32
    301 
    302 define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
    303 entry:
    304   %0 = load volatile <8 x i16>* %src
    305   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    306   %2 = bitcast <8 x i16> %1 to <2 x i64>
    307   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
    308   store <2 x i64> %3, <2 x i64>* %dst
    309   ret void
    310 }
    311 
    312 ; LITENDIAN: v8i16_to_v2i64:
    313 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    314 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    315 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    316 ; LITENDIAN: st.d [[R3]],
    317 ; LITENDIAN: .size v8i16_to_v2i64
    318 
    319 ; BIGENDIAN: v8i16_to_v2i64:
    320 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    321 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    322 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
    323 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    324 ; BIGENDIAN: st.d [[R4]],
    325 ; BIGENDIAN: .size v8i16_to_v2i64
    326 
    327 define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
    328 entry:
    329   %0 = load volatile <8 x i16>* %src
    330   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
    331   %2 = bitcast <8 x i16> %1 to <2 x double>
    332   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
    333   store <2 x double> %3, <2 x double>* %dst
    334   ret void
    335 }
    336 
    337 ; LITENDIAN: v8i16_to_v2f64:
    338 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    339 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    340 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    341 ; LITENDIAN: st.d [[R3]],
    342 ; LITENDIAN: .size v8i16_to_v2f64
    343 
    344 ; BIGENDIAN: v8i16_to_v2f64:
    345 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    346 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    347 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
    348 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    349 ; BIGENDIAN: st.d [[R4]],
    350 ; BIGENDIAN: .size v8i16_to_v2f64
    351 
    352 ;----
    353 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    354 ; are no operations for v8f16 to put in the way.
    355 define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
    356 entry:
    357   %0 = load volatile <8 x half>* %src
    358   %1 = bitcast <8 x half> %0 to <16 x i8>
    359   %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
    360   store <16 x i8> %2, <16 x i8>* %dst
    361   ret void
    362 }
    363 
    364 ; LITENDIAN: v8f16_to_v16i8:
    365 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    366 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    367 ; LITENDIAN: st.b [[R3]],
    368 ; LITENDIAN: .size v8f16_to_v16i8
    369 
    370 ; BIGENDIAN: v8f16_to_v16i8:
    371 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    372 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
    373 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
    374 ; BIGENDIAN: st.b [[R4]],
    375 ; BIGENDIAN: .size v8f16_to_v16i8
    376 
    377 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    378 ; are no operations for v8f16 to put in the way.
    379 define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
    380 entry:
    381   %0 = load volatile <8 x half>* %src
    382   %1 = bitcast <8 x half> %0 to <8 x i16>
    383   %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
    384   store <8 x i16> %2, <8 x i16>* %dst
    385   ret void
    386 }
    387 
    388 ; LITENDIAN: v8f16_to_v8i16:
    389 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    390 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    391 ; LITENDIAN: st.h [[R2]],
    392 ; LITENDIAN: .size v8f16_to_v8i16
    393 
    394 ; BIGENDIAN: v8f16_to_v8i16:
    395 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    396 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    397 ; BIGENDIAN: st.h [[R2]],
    398 ; BIGENDIAN: .size v8f16_to_v8i16
    399 
    400 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    401 ; are no operations for v8f16 to put in the way.
    402 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
    403 ; are no operations for v8f16 to put in the way.
    404 define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
    405 entry:
    406   %0 = load volatile <8 x half>* %src
    407   %1 = bitcast <8 x half> %0 to <8 x half>
    408   store <8 x half> %1, <8 x half>* %dst
    409   ret void
    410 }
    411 
    412 ; LITENDIAN: v8f16_to_v8f16:
    413 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    414 ; LITENDIAN: st.h [[R1]],
    415 ; LITENDIAN: .size v8f16_to_v8f16
    416 
    417 ; BIGENDIAN: v8f16_to_v8f16:
    418 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    419 ; BIGENDIAN: st.h [[R1]],
    420 ; BIGENDIAN: .size v8f16_to_v8f16
    421 
    422 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    423 ; are no operations for v8f16 to put in the way.
    424 define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
    425 entry:
    426   %0 = load volatile <8 x half>* %src
    427   %1 = bitcast <8 x half> %0 to <4 x i32>
    428   %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
    429   store <4 x i32> %2, <4 x i32>* %dst
    430   ret void
    431 }
    432 
    433 ; LITENDIAN: v8f16_to_v4i32:
    434 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    435 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    436 ; LITENDIAN: st.w [[R2]],
    437 ; LITENDIAN: .size v8f16_to_v4i32
    438 
    439 ; BIGENDIAN: v8f16_to_v4i32:
    440 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    441 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
    442 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    443 ; BIGENDIAN: st.w [[R3]],
    444 ; BIGENDIAN: .size v8f16_to_v4i32
    445 
    446 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    447 ; are no operations for v8f16 to put in the way.
    448 define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
    449 entry:
    450   %0 = load volatile <8 x half>* %src
    451   %1 = bitcast <8 x half> %0 to <4 x float>
    452   %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
    453   store <4 x float> %2, <4 x float>* %dst
    454   ret void
    455 }
    456 
    457 ; LITENDIAN: v8f16_to_v4f32:
    458 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    459 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    460 ; LITENDIAN: st.w [[R2]],
    461 ; LITENDIAN: .size v8f16_to_v4f32
    462 
    463 ; BIGENDIAN: v8f16_to_v4f32:
    464 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    465 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
    466 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    467 ; BIGENDIAN: st.w [[R3]],
    468 ; BIGENDIAN: .size v8f16_to_v4f32
    469 
    470 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    471 ; are no operations for v8f16 to put in the way.
    472 define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
    473 entry:
    474   %0 = load volatile <8 x half>* %src
    475   %1 = bitcast <8 x half> %0 to <2 x i64>
    476   %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
    477   store <2 x i64> %2, <2 x i64>* %dst
    478   ret void
    479 }
    480 
    481 ; LITENDIAN: v8f16_to_v2i64:
    482 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    483 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    484 ; LITENDIAN: st.d [[R2]],
    485 ; LITENDIAN: .size v8f16_to_v2i64
    486 
    487 ; BIGENDIAN: v8f16_to_v2i64:
    488 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    489 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
    490 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    491 ; BIGENDIAN: st.d [[R3]],
    492 ; BIGENDIAN: .size v8f16_to_v2i64
    493 
    494 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
    495 ; are no operations for v8f16 to put in the way.
    496 define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
    497 entry:
    498   %0 = load volatile <8 x half>* %src
    499   %1 = bitcast <8 x half> %0 to <2 x double>
    500   %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
    501   store <2 x double> %2, <2 x double>* %dst
    502   ret void
    503 }
    504 
    505 ; LITENDIAN: v8f16_to_v2f64:
    506 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
    507 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    508 ; LITENDIAN: st.d [[R2]],
    509 ; LITENDIAN: .size v8f16_to_v2f64
    510 
    511 ; BIGENDIAN: v8f16_to_v2f64:
    512 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
    513 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
    514 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    515 ; BIGENDIAN: st.d [[R3]],
    516 ; BIGENDIAN: .size v8f16_to_v2f64
    517 ;----
    518 
    519 define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
    520 entry:
    521   %0 = load volatile <4 x i32>* %src
    522   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    523   %2 = bitcast <4 x i32> %1 to <16 x i8>
    524   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
    525   store <16 x i8> %3, <16 x i8>* %dst
    526   ret void
    527 }
    528 
    529 ; LITENDIAN: v4i32_to_v16i8:
    530 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    531 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    532 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    533 ; LITENDIAN: st.b [[R3]],
    534 ; LITENDIAN: .size v4i32_to_v16i8
    535 
    536 ; BIGENDIAN: v4i32_to_v16i8:
    537 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    538 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    539 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
    540 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    541 ; BIGENDIAN: st.b [[R4]],
    542 ; BIGENDIAN: .size v4i32_to_v16i8
    543 
    544 define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
    545 entry:
    546   %0 = load volatile <4 x i32>* %src
    547   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    548   %2 = bitcast <4 x i32> %1 to <8 x i16>
    549   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
    550   store <8 x i16> %3, <8 x i16>* %dst
    551   ret void
    552 }
    553 
    554 ; LITENDIAN: v4i32_to_v8i16:
    555 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    556 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    557 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    558 ; LITENDIAN: st.h [[R3]],
    559 ; LITENDIAN: .size v4i32_to_v8i16
    560 
    561 ; BIGENDIAN: v4i32_to_v8i16:
    562 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    563 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    564 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
    565 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    566 ; BIGENDIAN: st.h [[R4]],
    567 ; BIGENDIAN: .size v4i32_to_v8i16
    568 
    569 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
    570 ; are no operations for v8f16 to put in the way.
    571 define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
    572 entry:
    573   %0 = load volatile <4 x i32>* %src
    574   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    575   %2 = bitcast <4 x i32> %1 to <8 x half>
    576   store <8 x half> %2, <8 x half>* %dst
    577   ret void
    578 }
    579 
    580 ; LITENDIAN: v4i32_to_v8f16:
    581 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    582 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    583 ; LITENDIAN: st.w [[R2]],
    584 ; LITENDIAN: .size v4i32_to_v8f16
    585 
    586 ; BIGENDIAN: v4i32_to_v8f16:
    587 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    588 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    589 ; BIGENDIAN: st.w [[R2]],
    590 ; BIGENDIAN: .size v4i32_to_v8f16
    591 
    592 define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
    593 entry:
    594   %0 = load volatile <4 x i32>* %src
    595   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    596   %2 = bitcast <4 x i32> %1 to <4 x i32>
    597   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
    598   store <4 x i32> %3, <4 x i32>* %dst
    599   ret void
    600 }
    601 
    602 ; LITENDIAN: v4i32_to_v4i32:
    603 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    604 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    605 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    606 ; LITENDIAN: st.w [[R3]],
    607 ; LITENDIAN: .size v4i32_to_v4i32
    608 
    609 ; BIGENDIAN: v4i32_to_v4i32:
    610 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    611 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    612 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    613 ; BIGENDIAN: st.w [[R3]],
    614 ; BIGENDIAN: .size v4i32_to_v4i32
    615 
    616 define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
    617 entry:
    618   %0 = load volatile <4 x i32>* %src
    619   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    620   %2 = bitcast <4 x i32> %1 to <4 x float>
    621   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
    622   store <4 x float> %3, <4 x float>* %dst
    623   ret void
    624 }
    625 
    626 ; LITENDIAN: v4i32_to_v4f32:
    627 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    628 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    629 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    630 ; LITENDIAN: st.w [[R3]],
    631 ; LITENDIAN: .size v4i32_to_v4f32
    632 
    633 ; BIGENDIAN: v4i32_to_v4f32:
    634 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    635 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    636 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    637 ; BIGENDIAN: st.w [[R3]],
    638 ; BIGENDIAN: .size v4i32_to_v4f32
    639 
    640 define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
    641 entry:
    642   %0 = load volatile <4 x i32>* %src
    643   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    644   %2 = bitcast <4 x i32> %1 to <2 x i64>
    645   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
    646   store <2 x i64> %3, <2 x i64>* %dst
    647   ret void
    648 }
    649 
    650 ; LITENDIAN: v4i32_to_v2i64:
    651 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    652 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    653 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    654 ; LITENDIAN: st.d [[R3]],
    655 ; LITENDIAN: .size v4i32_to_v2i64
    656 
    657 ; BIGENDIAN: v4i32_to_v2i64:
    658 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    659 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    660 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
    661 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    662 ; BIGENDIAN: st.d [[R4]],
    663 ; BIGENDIAN: .size v4i32_to_v2i64
    664 
    665 define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
    666 entry:
    667   %0 = load volatile <4 x i32>* %src
    668   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
    669   %2 = bitcast <4 x i32> %1 to <2 x double>
    670   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
    671   store <2 x double> %3, <2 x double>* %dst
    672   ret void
    673 }
    674 
    675 ; LITENDIAN: v4i32_to_v2f64:
    676 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    677 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    678 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    679 ; LITENDIAN: st.d [[R3]],
    680 ; LITENDIAN: .size v4i32_to_v2f64
    681 
    682 ; BIGENDIAN: v4i32_to_v2f64:
    683 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    684 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    685 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
    686 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    687 ; BIGENDIAN: st.d [[R4]],
    688 ; BIGENDIAN: .size v4i32_to_v2f64
    689 
    690 define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
    691 entry:
    692   %0 = load volatile <4 x float>* %src
    693   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    694   %2 = bitcast <4 x float> %1 to <16 x i8>
    695   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
    696   store <16 x i8> %3, <16 x i8>* %dst
    697   ret void
    698 }
    699 
    700 ; LITENDIAN: v4f32_to_v16i8:
    701 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    702 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    703 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    704 ; LITENDIAN: st.b [[R3]],
    705 ; LITENDIAN: .size v4f32_to_v16i8
    706 
    707 ; BIGENDIAN: v4f32_to_v16i8:
    708 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    709 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    710 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
    711 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    712 ; BIGENDIAN: st.b [[R4]],
    713 ; BIGENDIAN: .size v4f32_to_v16i8
    714 
    715 define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
    716 entry:
    717   %0 = load volatile <4 x float>* %src
    718   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    719   %2 = bitcast <4 x float> %1 to <8 x i16>
    720   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
    721   store <8 x i16> %3, <8 x i16>* %dst
    722   ret void
    723 }
    724 
    725 ; LITENDIAN: v4f32_to_v8i16:
    726 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    727 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    728 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    729 ; LITENDIAN: st.h [[R3]],
    730 ; LITENDIAN: .size v4f32_to_v8i16
    731 
    732 ; BIGENDIAN: v4f32_to_v8i16:
    733 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    734 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    735 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
    736 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    737 ; BIGENDIAN: st.h [[R4]],
    738 ; BIGENDIAN: .size v4f32_to_v8i16
    739 
    740 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
    741 ; are no operations for v8f16 to put in the way.
    742 define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
    743 entry:
    744   %0 = load volatile <4 x float>* %src
    745   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    746   %2 = bitcast <4 x float> %1 to <8 x half>
    747   store <8 x half> %2, <8 x half>* %dst
    748   ret void
    749 }
    750 
    751 ; LITENDIAN: v4f32_to_v8f16:
    752 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    753 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    754 ; LITENDIAN: st.w [[R2]],
    755 ; LITENDIAN: .size v4f32_to_v8f16
    756 
    757 ; BIGENDIAN: v4f32_to_v8f16:
    758 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    759 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    760 ; BIGENDIAN: st.w [[R2]],
    761 ; BIGENDIAN: .size v4f32_to_v8f16
    762 
    763 define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
    764 entry:
    765   %0 = load volatile <4 x float>* %src
    766   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    767   %2 = bitcast <4 x float> %1 to <4 x i32>
    768   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
    769   store <4 x i32> %3, <4 x i32>* %dst
    770   ret void
    771 }
    772 
    773 ; LITENDIAN: v4f32_to_v4i32:
    774 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    775 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    776 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    777 ; LITENDIAN: st.w [[R3]],
    778 ; LITENDIAN: .size v4f32_to_v4i32
    779 
    780 ; BIGENDIAN: v4f32_to_v4i32:
    781 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    782 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    783 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    784 ; BIGENDIAN: st.w [[R3]],
    785 ; BIGENDIAN: .size v4f32_to_v4i32
    786 
    787 define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
    788 entry:
    789   %0 = load volatile <4 x float>* %src
    790   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    791   %2 = bitcast <4 x float> %1 to <4 x float>
    792   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
    793   store <4 x float> %3, <4 x float>* %dst
    794   ret void
    795 }
    796 
    797 ; LITENDIAN: v4f32_to_v4f32:
    798 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    799 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    800 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    801 ; LITENDIAN: st.w [[R3]],
    802 ; LITENDIAN: .size v4f32_to_v4f32
    803 
    804 ; BIGENDIAN: v4f32_to_v4f32:
    805 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    806 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    807 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    808 ; BIGENDIAN: st.w [[R3]],
    809 ; BIGENDIAN: .size v4f32_to_v4f32
    810 
    811 define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
    812 entry:
    813   %0 = load volatile <4 x float>* %src
    814   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    815   %2 = bitcast <4 x float> %1 to <2 x i64>
    816   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
    817   store <2 x i64> %3, <2 x i64>* %dst
    818   ret void
    819 }
    820 
    821 ; LITENDIAN: v4f32_to_v2i64:
    822 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    823 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    824 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    825 ; LITENDIAN: st.d [[R3]],
    826 ; LITENDIAN: .size v4f32_to_v2i64
    827 
    828 ; BIGENDIAN: v4f32_to_v2i64:
    829 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    830 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    831 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
    832 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    833 ; BIGENDIAN: st.d [[R4]],
    834 ; BIGENDIAN: .size v4f32_to_v2i64
    835 
    836 define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
    837 entry:
    838   %0 = load volatile <4 x float>* %src
    839   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
    840   %2 = bitcast <4 x float> %1 to <2 x double>
    841   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
    842   store <2 x double> %3, <2 x double>* %dst
    843   ret void
    844 }
    845 
    846 ; LITENDIAN: v4f32_to_v2f64:
    847 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
    848 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    849 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    850 ; LITENDIAN: st.d [[R3]],
    851 ; LITENDIAN: .size v4f32_to_v2f64
    852 
    853 ; BIGENDIAN: v4f32_to_v2f64:
    854 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
    855 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    856 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
    857 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    858 ; BIGENDIAN: st.d [[R4]],
    859 ; BIGENDIAN: .size v4f32_to_v2f64
    860 
    861 define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
    862 entry:
    863   %0 = load volatile <2 x i64>* %src
    864   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
    865   %2 = bitcast <2 x i64> %1 to <16 x i8>
    866   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
    867   store <16 x i8> %3, <16 x i8>* %dst
    868   ret void
    869 }
    870 
    871 ; LITENDIAN: v2i64_to_v16i8:
    872 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
    873 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    874 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    875 ; LITENDIAN: st.b [[R3]],
    876 ; LITENDIAN: .size v2i64_to_v16i8
    877 
    878 ; BIGENDIAN: v2i64_to_v16i8:
    879 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
    880 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    881 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
    882 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
    883 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    884 ; BIGENDIAN: st.b [[R4]],
    885 ; BIGENDIAN: .size v2i64_to_v16i8
    886 
    887 define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
    888 entry:
    889   %0 = load volatile <2 x i64>* %src
    890   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
    891   %2 = bitcast <2 x i64> %1 to <8 x i16>
    892   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
    893   store <8 x i16> %3, <8 x i16>* %dst
    894   ret void
    895 }
    896 
    897 ; LITENDIAN: v2i64_to_v8i16:
    898 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
    899 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    900 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    901 ; LITENDIAN: st.h [[R3]],
    902 ; LITENDIAN: .size v2i64_to_v8i16
    903 
    904 ; BIGENDIAN: v2i64_to_v8i16:
    905 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
    906 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    907 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
    908 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    909 ; BIGENDIAN: st.h [[R4]],
    910 ; BIGENDIAN: .size v2i64_to_v8i16
    911 
    912 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
    913 ; are no operations for v8f16 to put in the way.
    914 define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
    915 entry:
    916   %0 = load volatile <2 x i64>* %src
    917   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
    918   %2 = bitcast <2 x i64> %1 to <8 x half>
    919   store <8 x half> %2, <8 x half>* %dst
    920   ret void
    921 }
    922 
    923 ; LITENDIAN: v2i64_to_v8f16:
    924 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
    925 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    926 ; LITENDIAN: st.d [[R2]],
    927 ; LITENDIAN: .size v2i64_to_v8f16
    928 
    929 ; BIGENDIAN: v2i64_to_v8f16:
    930 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
    931 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    932 ; BIGENDIAN: st.d [[R2]],
    933 ; BIGENDIAN: .size v2i64_to_v8f16
    934 
    935 define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
    936 entry:
    937   %0 = load volatile <2 x i64>* %src
    938   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
    939   %2 = bitcast <2 x i64> %1 to <4 x i32>
    940   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
    941   store <4 x i32> %3, <4 x i32>* %dst
    942   ret void
    943 }
    944 
    945 ; LITENDIAN: v2i64_to_v4i32:
    946 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
    947 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    948 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    949 ; LITENDIAN: st.w [[R3]],
    950 ; LITENDIAN: .size v2i64_to_v4i32
    951 
    952 ; BIGENDIAN: v2i64_to_v4i32:
    953 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
    954 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    955 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
    956 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    957 ; BIGENDIAN: st.w [[R4]],
    958 ; BIGENDIAN: .size v2i64_to_v4i32
    959 
    960 define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
    961 entry:
    962   %0 = load volatile <2 x i64>* %src
    963   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
    964   %2 = bitcast <2 x i64> %1 to <4 x float>
    965   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
    966   store <4 x float> %3, <4 x float>* %dst
    967   ret void
    968 }
    969 
    970 ; LITENDIAN: v2i64_to_v4f32:
    971 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
    972 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    973 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    974 ; LITENDIAN: st.w [[R3]],
    975 ; LITENDIAN: .size v2i64_to_v4f32
    976 
    977 ; BIGENDIAN: v2i64_to_v4f32:
    978 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
    979 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    980 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
    981 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
    982 ; BIGENDIAN: st.w [[R4]],
    983 ; BIGENDIAN: .size v2i64_to_v4f32
    984 
    985 define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
    986 entry:
    987   %0 = load volatile <2 x i64>* %src
    988   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
    989   %2 = bitcast <2 x i64> %1 to <2 x i64>
    990   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
    991   store <2 x i64> %3, <2 x i64>* %dst
    992   ret void
    993 }
    994 
    995 ; LITENDIAN: v2i64_to_v2i64:
    996 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
    997 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    998 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    999 ; LITENDIAN: st.d [[R3]],
   1000 ; LITENDIAN: .size v2i64_to_v2i64
   1001 
   1002 ; BIGENDIAN: v2i64_to_v2i64:
   1003 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1004 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1005 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1006 ; BIGENDIAN: st.d [[R3]],
   1007 ; BIGENDIAN: .size v2i64_to_v2i64
   1008 
   1009 define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
   1010 entry:
   1011   %0 = load volatile <2 x i64>* %src
   1012   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   1013   %2 = bitcast <2 x i64> %1 to <2 x double>
   1014   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
   1015   store <2 x double> %3, <2 x double>* %dst
   1016   ret void
   1017 }
   1018 
   1019 ; LITENDIAN: v2i64_to_v2f64:
   1020 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1021 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1022 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1023 ; LITENDIAN: st.d [[R3]],
   1024 ; LITENDIAN: .size v2i64_to_v2f64
   1025 
   1026 ; BIGENDIAN: v2i64_to_v2f64:
   1027 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1028 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1029 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1030 ; BIGENDIAN: st.d [[R3]],
   1031 ; BIGENDIAN: .size v2i64_to_v2f64
   1032 
   1033 define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
   1034 entry:
   1035   %0 = load volatile <2 x double>* %src
   1036   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1037   %2 = bitcast <2 x double> %1 to <16 x i8>
   1038   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
   1039   store <16 x i8> %3, <16 x i8>* %dst
   1040   ret void
   1041 }
   1042 
   1043 ; LITENDIAN: v2f64_to_v16i8:
   1044 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1045 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1046 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1047 ; LITENDIAN: st.b [[R3]],
   1048 ; LITENDIAN: .size v2f64_to_v16i8
   1049 
   1050 ; BIGENDIAN: v2f64_to_v16i8:
   1051 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1052 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1053 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
   1054 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
   1055 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
   1056 ; BIGENDIAN: st.b [[R4]],
   1057 ; BIGENDIAN: .size v2f64_to_v16i8
   1058 
   1059 define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
   1060 entry:
   1061   %0 = load volatile <2 x double>* %src
   1062   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1063   %2 = bitcast <2 x double> %1 to <8 x i16>
   1064   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
   1065   store <8 x i16> %3, <8 x i16>* %dst
   1066   ret void
   1067 }
   1068 
   1069 ; LITENDIAN: v2f64_to_v8i16:
   1070 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1071 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1072 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1073 ; LITENDIAN: st.h [[R3]],
   1074 ; LITENDIAN: .size v2f64_to_v8i16
   1075 
   1076 ; BIGENDIAN: v2f64_to_v8i16:
   1077 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1078 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1079 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
   1080 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
   1081 ; BIGENDIAN: st.h [[R4]],
   1082 ; BIGENDIAN: .size v2f64_to_v8i16
   1083 
   1084 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
   1085 ; are no operations for v8f16 to put in the way.
   1086 define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
   1087 entry:
   1088   %0 = load volatile <2 x double>* %src
   1089   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1090   %2 = bitcast <2 x double> %1 to <8 x half>
   1091   store <8 x half> %2, <8 x half>* %dst
   1092   ret void
   1093 }
   1094 
   1095 ; LITENDIAN: v2f64_to_v8f16:
   1096 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1097 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1098 ; LITENDIAN: st.d [[R2]],
   1099 ; LITENDIAN: .size v2f64_to_v8f16
   1100 
   1101 ; BIGENDIAN: v2f64_to_v8f16:
   1102 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1103 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1104 ; BIGENDIAN: st.d [[R2]],
   1105 ; BIGENDIAN: .size v2f64_to_v8f16
   1106 
   1107 define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
   1108 entry:
   1109   %0 = load volatile <2 x double>* %src
   1110   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1111   %2 = bitcast <2 x double> %1 to <4 x i32>
   1112   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
   1113   store <4 x i32> %3, <4 x i32>* %dst
   1114   ret void
   1115 }
   1116 
   1117 ; LITENDIAN: v2f64_to_v4i32:
   1118 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1119 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1120 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1121 ; LITENDIAN: st.w [[R3]],
   1122 ; LITENDIAN: .size v2f64_to_v4i32
   1123 
   1124 ; BIGENDIAN: v2f64_to_v4i32:
   1125 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1126 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1127 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
   1128 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
   1129 ; BIGENDIAN: st.w [[R4]],
   1130 ; BIGENDIAN: .size v2f64_to_v4i32
   1131 
   1132 define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
   1133 entry:
   1134   %0 = load volatile <2 x double>* %src
   1135   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1136   %2 = bitcast <2 x double> %1 to <4 x float>
   1137   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
   1138   store <4 x float> %3, <4 x float>* %dst
   1139   ret void
   1140 }
   1141 
   1142 ; LITENDIAN: v2f64_to_v4f32:
   1143 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1144 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1145 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1146 ; LITENDIAN: st.w [[R3]],
   1147 ; LITENDIAN: .size v2f64_to_v4f32
   1148 
   1149 ; BIGENDIAN: v2f64_to_v4f32:
   1150 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1151 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1152 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
   1153 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
   1154 ; BIGENDIAN: st.w [[R4]],
   1155 ; BIGENDIAN: .size v2f64_to_v4f32
   1156 
   1157 define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
   1158 entry:
   1159   %0 = load volatile <2 x double>* %src
   1160   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1161   %2 = bitcast <2 x double> %1 to <2 x i64>
   1162   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
   1163   store <2 x i64> %3, <2 x i64>* %dst
   1164   ret void
   1165 }
   1166 
   1167 ; LITENDIAN: v2f64_to_v2i64:
   1168 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1169 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1170 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1171 ; LITENDIAN: st.d [[R3]],
   1172 ; LITENDIAN: .size v2f64_to_v2i64
   1173 
   1174 ; BIGENDIAN: v2f64_to_v2i64:
   1175 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1176 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1177 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1178 ; BIGENDIAN: st.d [[R3]],
   1179 ; BIGENDIAN: .size v2f64_to_v2i64
   1180 
   1181 define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
   1182 entry:
   1183   %0 = load volatile <2 x double>* %src
   1184   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   1185   %2 = bitcast <2 x double> %1 to <2 x double>
   1186   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
   1187   store <2 x double> %3, <2 x double>* %dst
   1188   ret void
   1189 }
   1190 
   1191 ; LITENDIAN: v2f64_to_v2f64:
   1192 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
   1193 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1194 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1195 ; LITENDIAN: st.d [[R3]],
   1196 ; LITENDIAN: .size v2f64_to_v2f64
   1197 
   1198 ; BIGENDIAN: v2f64_to_v2f64:
   1199 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
   1200 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
   1201 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1202 ; BIGENDIAN: st.d [[R3]],
   1203 ; BIGENDIAN: .size v2f64_to_v2f64
   1204 
   1205 declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
   1206 declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
   1207 declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
   1208 declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
   1209 declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
   1210 declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind
   1211