1 ; Test the bitcast operation for big-endian and little-endian. 2 3 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s 4 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s 5 6 define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind { 7 entry: 8 %0 = load volatile <16 x i8>, <16 x i8>* %src 9 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 10 %2 = bitcast <16 x i8> %1 to <16 x i8> 11 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2) 12 store <16 x i8> %3, <16 x i8>* %dst 13 ret void 14 } 15 16 ; LITENDIAN: v16i8_to_v16i8: 17 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 18 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 19 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 20 ; LITENDIAN: st.b [[R3]], 21 ; LITENDIAN: .size v16i8_to_v16i8 22 23 ; BIGENDIAN: v16i8_to_v16i8: 24 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 25 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 26 ; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 27 ; BIGENDIAN: st.b [[R3]], 28 ; BIGENDIAN: .size v16i8_to_v16i8 29 30 define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind { 31 entry: 32 %0 = load volatile <16 x i8>, <16 x i8>* %src 33 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 34 %2 = bitcast <16 x i8> %1 to <8 x i16> 35 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2) 36 store <8 x i16> %3, <8 x i16>* %dst 37 ret void 38 } 39 40 ; LITENDIAN: v16i8_to_v8i16: 41 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 42 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 43 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 44 ; LITENDIAN: st.h [[R3]], 45 ; LITENDIAN: .size v16i8_to_v8i16 46 47 ; BIGENDIAN: v16i8_to_v8i16: 48 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 49 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 50 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177 51 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]] 52 ; BIGENDIAN: st.h [[R4]], 53 ; BIGENDIAN: .size v16i8_to_v8i16 54 55 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 56 ; are no operations for v8f16 to put in the way. 57 define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind { 58 entry: 59 %0 = load volatile <16 x i8>, <16 x i8>* %src 60 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 61 %2 = bitcast <16 x i8> %1 to <8 x half> 62 store <8 x half> %2, <8 x half>* %dst 63 ret void 64 } 65 66 ; LITENDIAN: v16i8_to_v8f16: 67 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 68 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 69 ; LITENDIAN: st.b [[R2]], 70 ; LITENDIAN: .size v16i8_to_v8f16 71 72 ; BIGENDIAN: v16i8_to_v8f16: 73 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 74 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 75 ; BIGENDIAN: st.b [[R2]], 76 ; BIGENDIAN: .size v16i8_to_v8f16 77 78 define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind { 79 entry: 80 %0 = load volatile <16 x i8>, <16 x i8>* %src 81 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 82 %2 = bitcast <16 x i8> %1 to <4 x i32> 83 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2) 84 store <4 x i32> %3, <4 x i32>* %dst 85 ret void 86 } 87 88 ; LITENDIAN: v16i8_to_v4i32: 89 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 90 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 91 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 92 ; LITENDIAN: st.w [[R3]], 93 ; LITENDIAN: .size v16i8_to_v4i32 94 95 ; BIGENDIAN: v16i8_to_v4i32: 96 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 97 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 98 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 99 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 100 ; BIGENDIAN: st.w [[R4]], 101 ; BIGENDIAN: .size v16i8_to_v4i32 102 103 define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind { 104 entry: 105 %0 = load volatile <16 x i8>, <16 x i8>* %src 106 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 107 %2 = bitcast <16 x i8> %1 to <4 x float> 108 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2) 109 store <4 x float> %3, <4 x float>* %dst 110 ret void 111 } 112 113 ; LITENDIAN: v16i8_to_v4f32: 114 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 115 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 116 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 117 ; LITENDIAN: st.w [[R3]], 118 ; LITENDIAN: .size v16i8_to_v4f32 119 120 ; BIGENDIAN: v16i8_to_v4f32: 121 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 122 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 123 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 124 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 125 ; BIGENDIAN: st.w [[R4]], 126 ; BIGENDIAN: .size v16i8_to_v4f32 127 128 define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind { 129 entry: 130 %0 = load volatile <16 x i8>, <16 x i8>* %src 131 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 132 %2 = bitcast <16 x i8> %1 to <2 x i64> 133 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2) 134 store <2 x i64> %3, <2 x i64>* %dst 135 ret void 136 } 137 138 ; LITENDIAN: v16i8_to_v2i64: 139 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 140 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 141 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 142 ; LITENDIAN: st.d [[R3]], 143 ; LITENDIAN: .size v16i8_to_v2i64 144 145 ; BIGENDIAN: v16i8_to_v2i64: 146 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 147 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 148 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 149 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177 150 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 151 ; BIGENDIAN: st.d [[R4]], 152 ; BIGENDIAN: .size v16i8_to_v2i64 153 154 define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind { 155 entry: 156 %0 = load volatile <16 x i8>, <16 x i8>* %src 157 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0) 158 %2 = bitcast <16 x i8> %1 to <2 x double> 159 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2) 160 store <2 x double> %3, <2 x double>* %dst 161 ret void 162 } 163 164 ; LITENDIAN: v16i8_to_v2f64: 165 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]], 166 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 167 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 168 ; LITENDIAN: st.d [[R3]], 169 ; LITENDIAN: .size v16i8_to_v2f64 170 171 ; BIGENDIAN: v16i8_to_v2f64: 172 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]], 173 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] 174 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 175 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177 176 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 177 ; BIGENDIAN: st.d [[R4]], 178 ; BIGENDIAN: .size v16i8_to_v2f64 179 180 define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind { 181 entry: 182 %0 = load volatile <8 x i16>, <8 x i16>* %src 183 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 184 %2 = bitcast <8 x i16> %1 to <16 x i8> 185 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2) 186 store <16 x i8> %3, <16 x i8>* %dst 187 ret void 188 } 189 190 ; LITENDIAN: v8i16_to_v16i8: 191 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 192 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 193 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 194 ; LITENDIAN: st.b [[R3]], 195 ; LITENDIAN: .size v8i16_to_v16i8 196 197 ; BIGENDIAN: v8i16_to_v16i8: 198 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 199 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 200 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177 201 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]] 202 ; BIGENDIAN: st.b [[R4]], 203 ; BIGENDIAN: .size v8i16_to_v16i8 204 205 define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind { 206 entry: 207 %0 = load volatile <8 x i16>, <8 x i16>* %src 208 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 209 %2 = bitcast <8 x i16> %1 to <8 x i16> 210 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2) 211 store <8 x i16> %3, <8 x i16>* %dst 212 ret void 213 } 214 215 ; LITENDIAN: v8i16_to_v8i16: 216 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 217 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 218 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 219 ; LITENDIAN: st.h [[R3]], 220 ; LITENDIAN: .size v8i16_to_v8i16 221 222 ; BIGENDIAN: v8i16_to_v8i16: 223 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 224 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 225 ; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 226 ; BIGENDIAN: st.h [[R3]], 227 ; BIGENDIAN: .size v8i16_to_v8i16 228 229 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 230 ; are no operations for v8f16 to put in the way. 231 define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind { 232 entry: 233 %0 = load volatile <8 x i16>, <8 x i16>* %src 234 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 235 %2 = bitcast <8 x i16> %1 to <8 x half> 236 store <8 x half> %2, <8 x half>* %dst 237 ret void 238 } 239 240 ; LITENDIAN: v8i16_to_v8f16: 241 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 242 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 243 ; LITENDIAN: st.h [[R2]], 244 ; LITENDIAN: .size v8i16_to_v8f16 245 246 ; BIGENDIAN: v8i16_to_v8f16: 247 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 248 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 249 ; BIGENDIAN: st.h [[R2]], 250 ; BIGENDIAN: .size v8i16_to_v8f16 251 252 define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind { 253 entry: 254 %0 = load volatile <8 x i16>, <8 x i16>* %src 255 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 256 %2 = bitcast <8 x i16> %1 to <4 x i32> 257 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2) 258 store <4 x i32> %3, <4 x i32>* %dst 259 ret void 260 } 261 262 ; LITENDIAN: v8i16_to_v4i32: 263 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 264 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 265 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 266 ; LITENDIAN: st.w [[R3]], 267 ; LITENDIAN: .size v8i16_to_v4i32 268 269 ; BIGENDIAN: v8i16_to_v4i32: 270 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 271 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 272 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177 273 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 274 ; BIGENDIAN: st.w [[R4]], 275 ; BIGENDIAN: .size v8i16_to_v4i32 276 277 define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind { 278 entry: 279 %0 = load volatile <8 x i16>, <8 x i16>* %src 280 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 281 %2 = bitcast <8 x i16> %1 to <4 x float> 282 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2) 283 store <4 x float> %3, <4 x float>* %dst 284 ret void 285 } 286 287 ; LITENDIAN: v8i16_to_v4f32: 288 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 289 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 290 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 291 ; LITENDIAN: st.w [[R3]], 292 ; LITENDIAN: .size v8i16_to_v4f32 293 294 ; BIGENDIAN: v8i16_to_v4f32: 295 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 296 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 297 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177 298 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 299 ; BIGENDIAN: st.w [[R4]], 300 ; BIGENDIAN: .size v8i16_to_v4f32 301 302 define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind { 303 entry: 304 %0 = load volatile <8 x i16>, <8 x i16>* %src 305 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 306 %2 = bitcast <8 x i16> %1 to <2 x i64> 307 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2) 308 store <2 x i64> %3, <2 x i64>* %dst 309 ret void 310 } 311 312 ; LITENDIAN: v8i16_to_v2i64: 313 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 314 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 315 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 316 ; LITENDIAN: st.d [[R3]], 317 ; LITENDIAN: .size v8i16_to_v2i64 318 319 ; BIGENDIAN: v8i16_to_v2i64: 320 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 321 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 322 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27 323 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 324 ; BIGENDIAN: st.d [[R4]], 325 ; BIGENDIAN: .size v8i16_to_v2i64 326 327 define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind { 328 entry: 329 %0 = load volatile <8 x i16>, <8 x i16>* %src 330 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0) 331 %2 = bitcast <8 x i16> %1 to <2 x double> 332 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2) 333 store <2 x double> %3, <2 x double>* %dst 334 ret void 335 } 336 337 ; LITENDIAN: v8i16_to_v2f64: 338 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 339 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 340 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 341 ; LITENDIAN: st.d [[R3]], 342 ; LITENDIAN: .size v8i16_to_v2f64 343 344 ; BIGENDIAN: v8i16_to_v2f64: 345 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 346 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 347 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27 348 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 349 ; BIGENDIAN: st.d [[R4]], 350 ; BIGENDIAN: .size v8i16_to_v2f64 351 352 ;---- 353 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 354 ; are no operations for v8f16 to put in the way. 355 define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind { 356 entry: 357 %0 = load volatile <8 x half>, <8 x half>* %src 358 %1 = bitcast <8 x half> %0 to <16 x i8> 359 %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1) 360 store <16 x i8> %2, <16 x i8>* %dst 361 ret void 362 } 363 364 ; LITENDIAN: v8f16_to_v16i8: 365 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 366 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 367 ; LITENDIAN: st.b [[R3]], 368 ; LITENDIAN: .size v8f16_to_v16i8 369 370 ; BIGENDIAN: v8f16_to_v16i8: 371 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 372 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177 373 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]] 374 ; BIGENDIAN: st.b [[R4]], 375 ; BIGENDIAN: .size v8f16_to_v16i8 376 377 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 378 ; are no operations for v8f16 to put in the way. 379 define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind { 380 entry: 381 %0 = load volatile <8 x half>, <8 x half>* %src 382 %1 = bitcast <8 x half> %0 to <8 x i16> 383 %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1) 384 store <8 x i16> %2, <8 x i16>* %dst 385 ret void 386 } 387 388 ; LITENDIAN: v8f16_to_v8i16: 389 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 390 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 391 ; LITENDIAN: st.h [[R2]], 392 ; LITENDIAN: .size v8f16_to_v8i16 393 394 ; BIGENDIAN: v8f16_to_v8i16: 395 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 396 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] 397 ; BIGENDIAN: st.h [[R2]], 398 ; BIGENDIAN: .size v8f16_to_v8i16 399 400 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 401 ; are no operations for v8f16 to put in the way. 402 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 403 ; are no operations for v8f16 to put in the way. 404 define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind { 405 entry: 406 %0 = load volatile <8 x half>, <8 x half>* %src 407 %1 = bitcast <8 x half> %0 to <8 x half> 408 store <8 x half> %1, <8 x half>* %dst 409 ret void 410 } 411 412 ; LITENDIAN: v8f16_to_v8f16: 413 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 414 ; LITENDIAN: st.h [[R1]], 415 ; LITENDIAN: .size v8f16_to_v8f16 416 417 ; BIGENDIAN: v8f16_to_v8f16: 418 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 419 ; BIGENDIAN: st.h [[R1]], 420 ; BIGENDIAN: .size v8f16_to_v8f16 421 422 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 423 ; are no operations for v8f16 to put in the way. 424 define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind { 425 entry: 426 %0 = load volatile <8 x half>, <8 x half>* %src 427 %1 = bitcast <8 x half> %0 to <4 x i32> 428 %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1) 429 store <4 x i32> %2, <4 x i32>* %dst 430 ret void 431 } 432 433 ; LITENDIAN: v8f16_to_v4i32: 434 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 435 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 436 ; LITENDIAN: st.w [[R2]], 437 ; LITENDIAN: .size v8f16_to_v4i32 438 439 ; BIGENDIAN: v8f16_to_v4i32: 440 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 441 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177 442 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 443 ; BIGENDIAN: st.w [[R3]], 444 ; BIGENDIAN: .size v8f16_to_v4i32 445 446 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 447 ; are no operations for v8f16 to put in the way. 448 define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind { 449 entry: 450 %0 = load volatile <8 x half>, <8 x half>* %src 451 %1 = bitcast <8 x half> %0 to <4 x float> 452 %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1) 453 store <4 x float> %2, <4 x float>* %dst 454 ret void 455 } 456 457 ; LITENDIAN: v8f16_to_v4f32: 458 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 459 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 460 ; LITENDIAN: st.w [[R2]], 461 ; LITENDIAN: .size v8f16_to_v4f32 462 463 ; BIGENDIAN: v8f16_to_v4f32: 464 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 465 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177 466 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 467 ; BIGENDIAN: st.w [[R3]], 468 ; BIGENDIAN: .size v8f16_to_v4f32 469 470 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 471 ; are no operations for v8f16 to put in the way. 472 define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind { 473 entry: 474 %0 = load volatile <8 x half>, <8 x half>* %src 475 %1 = bitcast <8 x half> %0 to <2 x i64> 476 %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1) 477 store <2 x i64> %2, <2 x i64>* %dst 478 ret void 479 } 480 481 ; LITENDIAN: v8f16_to_v2i64: 482 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 483 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 484 ; LITENDIAN: st.d [[R2]], 485 ; LITENDIAN: .size v8f16_to_v2i64 486 487 ; BIGENDIAN: v8f16_to_v2i64: 488 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 489 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27 490 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 491 ; BIGENDIAN: st.d [[R3]], 492 ; BIGENDIAN: .size v8f16_to_v2i64 493 494 ; We can't prevent the (bitcast (load X)) DAG Combine here because there 495 ; are no operations for v8f16 to put in the way. 496 define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind { 497 entry: 498 %0 = load volatile <8 x half>, <8 x half>* %src 499 %1 = bitcast <8 x half> %0 to <2 x double> 500 %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1) 501 store <2 x double> %2, <2 x double>* %dst 502 ret void 503 } 504 505 ; LITENDIAN: v8f16_to_v2f64: 506 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]], 507 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 508 ; LITENDIAN: st.d [[R2]], 509 ; LITENDIAN: .size v8f16_to_v2f64 510 511 ; BIGENDIAN: v8f16_to_v2f64: 512 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]], 513 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27 514 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 515 ; BIGENDIAN: st.d [[R3]], 516 ; BIGENDIAN: .size v8f16_to_v2f64 517 ;---- 518 519 define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind { 520 entry: 521 %0 = load volatile <4 x i32>, <4 x i32>* %src 522 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 523 %2 = bitcast <4 x i32> %1 to <16 x i8> 524 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2) 525 store <16 x i8> %3, <16 x i8>* %dst 526 ret void 527 } 528 529 ; LITENDIAN: v4i32_to_v16i8: 530 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 531 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 532 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 533 ; LITENDIAN: st.b [[R3]], 534 ; LITENDIAN: .size v4i32_to_v16i8 535 536 ; BIGENDIAN: v4i32_to_v16i8: 537 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 538 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 539 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 540 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]] 541 ; BIGENDIAN: st.b [[R4]], 542 ; BIGENDIAN: .size v4i32_to_v16i8 543 544 define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind { 545 entry: 546 %0 = load volatile <4 x i32>, <4 x i32>* %src 547 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 548 %2 = bitcast <4 x i32> %1 to <8 x i16> 549 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2) 550 store <8 x i16> %3, <8 x i16>* %dst 551 ret void 552 } 553 554 ; LITENDIAN: v4i32_to_v8i16: 555 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 556 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 557 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 558 ; LITENDIAN: st.h [[R3]], 559 ; LITENDIAN: .size v4i32_to_v8i16 560 561 ; BIGENDIAN: v4i32_to_v8i16: 562 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 563 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 564 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177 565 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]] 566 ; BIGENDIAN: st.h [[R4]], 567 ; BIGENDIAN: .size v4i32_to_v8i16 568 569 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 570 ; are no operations for v8f16 to put in the way. 571 define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind { 572 entry: 573 %0 = load volatile <4 x i32>, <4 x i32>* %src 574 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 575 %2 = bitcast <4 x i32> %1 to <8 x half> 576 store <8 x half> %2, <8 x half>* %dst 577 ret void 578 } 579 580 ; LITENDIAN: v4i32_to_v8f16: 581 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 582 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 583 ; LITENDIAN: st.w [[R2]], 584 ; LITENDIAN: .size v4i32_to_v8f16 585 586 ; BIGENDIAN: v4i32_to_v8f16: 587 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 588 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 589 ; BIGENDIAN: st.w [[R2]], 590 ; BIGENDIAN: .size v4i32_to_v8f16 591 592 define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind { 593 entry: 594 %0 = load volatile <4 x i32>, <4 x i32>* %src 595 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 596 %2 = bitcast <4 x i32> %1 to <4 x i32> 597 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2) 598 store <4 x i32> %3, <4 x i32>* %dst 599 ret void 600 } 601 602 ; LITENDIAN: v4i32_to_v4i32: 603 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 604 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 605 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 606 ; LITENDIAN: st.w [[R3]], 607 ; LITENDIAN: .size v4i32_to_v4i32 608 609 ; BIGENDIAN: v4i32_to_v4i32: 610 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 611 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 612 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 613 ; BIGENDIAN: st.w [[R3]], 614 ; BIGENDIAN: .size v4i32_to_v4i32 615 616 define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind { 617 entry: 618 %0 = load volatile <4 x i32>, <4 x i32>* %src 619 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 620 %2 = bitcast <4 x i32> %1 to <4 x float> 621 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2) 622 store <4 x float> %3, <4 x float>* %dst 623 ret void 624 } 625 626 ; LITENDIAN: v4i32_to_v4f32: 627 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 628 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 629 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 630 ; LITENDIAN: st.w [[R3]], 631 ; LITENDIAN: .size v4i32_to_v4f32 632 633 ; BIGENDIAN: v4i32_to_v4f32: 634 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 635 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 636 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 637 ; BIGENDIAN: st.w [[R3]], 638 ; BIGENDIAN: .size v4i32_to_v4f32 639 640 define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind { 641 entry: 642 %0 = load volatile <4 x i32>, <4 x i32>* %src 643 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 644 %2 = bitcast <4 x i32> %1 to <2 x i64> 645 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2) 646 store <2 x i64> %3, <2 x i64>* %dst 647 ret void 648 } 649 650 ; LITENDIAN: v4i32_to_v2i64: 651 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 652 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 653 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 654 ; LITENDIAN: st.d [[R3]], 655 ; LITENDIAN: .size v4i32_to_v2i64 656 657 ; BIGENDIAN: v4i32_to_v2i64: 658 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 659 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 660 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 661 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 662 ; BIGENDIAN: st.d [[R4]], 663 ; BIGENDIAN: .size v4i32_to_v2i64 664 665 define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind { 666 entry: 667 %0 = load volatile <4 x i32>, <4 x i32>* %src 668 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0) 669 %2 = bitcast <4 x i32> %1 to <2 x double> 670 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2) 671 store <2 x double> %3, <2 x double>* %dst 672 ret void 673 } 674 675 ; LITENDIAN: v4i32_to_v2f64: 676 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 677 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 678 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 679 ; LITENDIAN: st.d [[R3]], 680 ; LITENDIAN: .size v4i32_to_v2f64 681 682 ; BIGENDIAN: v4i32_to_v2f64: 683 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 684 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 685 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 686 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 687 ; BIGENDIAN: st.d [[R4]], 688 ; BIGENDIAN: .size v4i32_to_v2f64 689 690 define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind { 691 entry: 692 %0 = load volatile <4 x float>, <4 x float>* %src 693 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 694 %2 = bitcast <4 x float> %1 to <16 x i8> 695 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2) 696 store <16 x i8> %3, <16 x i8>* %dst 697 ret void 698 } 699 700 ; LITENDIAN: v4f32_to_v16i8: 701 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 702 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 703 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 704 ; LITENDIAN: st.b [[R3]], 705 ; LITENDIAN: .size v4f32_to_v16i8 706 707 ; BIGENDIAN: v4f32_to_v16i8: 708 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 709 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 710 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 711 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]] 712 ; BIGENDIAN: st.b [[R4]], 713 ; BIGENDIAN: .size v4f32_to_v16i8 714 715 define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind { 716 entry: 717 %0 = load volatile <4 x float>, <4 x float>* %src 718 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 719 %2 = bitcast <4 x float> %1 to <8 x i16> 720 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2) 721 store <8 x i16> %3, <8 x i16>* %dst 722 ret void 723 } 724 725 ; LITENDIAN: v4f32_to_v8i16: 726 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 727 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 728 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 729 ; LITENDIAN: st.h [[R3]], 730 ; LITENDIAN: .size v4f32_to_v8i16 731 732 ; BIGENDIAN: v4f32_to_v8i16: 733 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 734 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 735 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177 736 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]] 737 ; BIGENDIAN: st.h [[R4]], 738 ; BIGENDIAN: .size v4f32_to_v8i16 739 740 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 741 ; are no operations for v8f16 to put in the way. 742 define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind { 743 entry: 744 %0 = load volatile <4 x float>, <4 x float>* %src 745 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 746 %2 = bitcast <4 x float> %1 to <8 x half> 747 store <8 x half> %2, <8 x half>* %dst 748 ret void 749 } 750 751 ; LITENDIAN: v4f32_to_v8f16: 752 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 753 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 754 ; LITENDIAN: st.w [[R2]], 755 ; LITENDIAN: .size v4f32_to_v8f16 756 757 ; BIGENDIAN: v4f32_to_v8f16: 758 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 759 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 760 ; BIGENDIAN: st.w [[R2]], 761 ; BIGENDIAN: .size v4f32_to_v8f16 762 763 define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind { 764 entry: 765 %0 = load volatile <4 x float>, <4 x float>* %src 766 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 767 %2 = bitcast <4 x float> %1 to <4 x i32> 768 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2) 769 store <4 x i32> %3, <4 x i32>* %dst 770 ret void 771 } 772 773 ; LITENDIAN: v4f32_to_v4i32: 774 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 775 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 776 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 777 ; LITENDIAN: st.w [[R3]], 778 ; LITENDIAN: .size v4f32_to_v4i32 779 780 ; BIGENDIAN: v4f32_to_v4i32: 781 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 782 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 783 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 784 ; BIGENDIAN: st.w [[R3]], 785 ; BIGENDIAN: .size v4f32_to_v4i32 786 787 define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind { 788 entry: 789 %0 = load volatile <4 x float>, <4 x float>* %src 790 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 791 %2 = bitcast <4 x float> %1 to <4 x float> 792 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2) 793 store <4 x float> %3, <4 x float>* %dst 794 ret void 795 } 796 797 ; LITENDIAN: v4f32_to_v4f32: 798 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 799 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 800 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 801 ; LITENDIAN: st.w [[R3]], 802 ; LITENDIAN: .size v4f32_to_v4f32 803 804 ; BIGENDIAN: v4f32_to_v4f32: 805 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 806 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 807 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 808 ; BIGENDIAN: st.w [[R3]], 809 ; BIGENDIAN: .size v4f32_to_v4f32 810 811 define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind { 812 entry: 813 %0 = load volatile <4 x float>, <4 x float>* %src 814 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 815 %2 = bitcast <4 x float> %1 to <2 x i64> 816 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2) 817 store <2 x i64> %3, <2 x i64>* %dst 818 ret void 819 } 820 821 ; LITENDIAN: v4f32_to_v2i64: 822 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 823 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 824 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 825 ; LITENDIAN: st.d [[R3]], 826 ; LITENDIAN: .size v4f32_to_v2i64 827 828 ; BIGENDIAN: v4f32_to_v2i64: 829 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 830 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 831 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 832 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 833 ; BIGENDIAN: st.d [[R4]], 834 ; BIGENDIAN: .size v4f32_to_v2i64 835 836 define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind { 837 entry: 838 %0 = load volatile <4 x float>, <4 x float>* %src 839 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0) 840 %2 = bitcast <4 x float> %1 to <2 x double> 841 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2) 842 store <2 x double> %3, <2 x double>* %dst 843 ret void 844 } 845 846 ; LITENDIAN: v4f32_to_v2f64: 847 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]], 848 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 849 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 850 ; LITENDIAN: st.d [[R3]], 851 ; LITENDIAN: .size v4f32_to_v2f64 852 853 ; BIGENDIAN: v4f32_to_v2f64: 854 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]], 855 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 856 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 857 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]] 858 ; BIGENDIAN: st.d [[R4]], 859 ; BIGENDIAN: .size v4f32_to_v2f64 860 861 define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind { 862 entry: 863 %0 = load volatile <2 x i64>, <2 x i64>* %src 864 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 865 %2 = bitcast <2 x i64> %1 to <16 x i8> 866 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2) 867 store <16 x i8> %3, <16 x i8>* %dst 868 ret void 869 } 870 871 ; LITENDIAN: v2i64_to_v16i8: 872 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 873 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 874 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 875 ; LITENDIAN: st.b [[R3]], 876 ; LITENDIAN: .size v2i64_to_v16i8 877 878 ; BIGENDIAN: v2i64_to_v16i8: 879 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 880 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 881 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 882 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177 883 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]] 884 ; BIGENDIAN: st.b [[R4]], 885 ; BIGENDIAN: .size v2i64_to_v16i8 886 887 define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind { 888 entry: 889 %0 = load volatile <2 x i64>, <2 x i64>* %src 890 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 891 %2 = bitcast <2 x i64> %1 to <8 x i16> 892 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2) 893 store <8 x i16> %3, <8 x i16>* %dst 894 ret void 895 } 896 897 ; LITENDIAN: v2i64_to_v8i16: 898 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 899 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 900 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 901 ; LITENDIAN: st.h [[R3]], 902 ; LITENDIAN: .size v2i64_to_v8i16 903 904 ; BIGENDIAN: v2i64_to_v8i16: 905 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 906 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 907 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27 908 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]] 909 ; BIGENDIAN: st.h [[R4]], 910 ; BIGENDIAN: .size v2i64_to_v8i16 911 912 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 913 ; are no operations for v8f16 to put in the way. 914 define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind { 915 entry: 916 %0 = load volatile <2 x i64>, <2 x i64>* %src 917 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 918 %2 = bitcast <2 x i64> %1 to <8 x half> 919 store <8 x half> %2, <8 x half>* %dst 920 ret void 921 } 922 923 ; LITENDIAN: v2i64_to_v8f16: 924 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 925 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 926 ; LITENDIAN: st.d [[R2]], 927 ; LITENDIAN: .size v2i64_to_v8f16 928 929 ; BIGENDIAN: v2i64_to_v8f16: 930 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 931 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 932 ; BIGENDIAN: st.d [[R2]], 933 ; BIGENDIAN: .size v2i64_to_v8f16 934 935 define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind { 936 entry: 937 %0 = load volatile <2 x i64>, <2 x i64>* %src 938 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 939 %2 = bitcast <2 x i64> %1 to <4 x i32> 940 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2) 941 store <4 x i32> %3, <4 x i32>* %dst 942 ret void 943 } 944 945 ; LITENDIAN: v2i64_to_v4i32: 946 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 947 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 948 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 949 ; LITENDIAN: st.w [[R3]], 950 ; LITENDIAN: .size v2i64_to_v4i32 951 952 ; BIGENDIAN: v2i64_to_v4i32: 953 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 954 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 955 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 956 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 957 ; BIGENDIAN: st.w [[R4]], 958 ; BIGENDIAN: .size v2i64_to_v4i32 959 960 define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind { 961 entry: 962 %0 = load volatile <2 x i64>, <2 x i64>* %src 963 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 964 %2 = bitcast <2 x i64> %1 to <4 x float> 965 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2) 966 store <4 x float> %3, <4 x float>* %dst 967 ret void 968 } 969 970 ; LITENDIAN: v2i64_to_v4f32: 971 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 972 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 973 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 974 ; LITENDIAN: st.w [[R3]], 975 ; LITENDIAN: .size v2i64_to_v4f32 976 977 ; BIGENDIAN: v2i64_to_v4f32: 978 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 979 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 980 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 981 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 982 ; BIGENDIAN: st.w [[R4]], 983 ; BIGENDIAN: .size v2i64_to_v4f32 984 985 define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind { 986 entry: 987 %0 = load volatile <2 x i64>, <2 x i64>* %src 988 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 989 %2 = bitcast <2 x i64> %1 to <2 x i64> 990 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2) 991 store <2 x i64> %3, <2 x i64>* %dst 992 ret void 993 } 994 995 ; LITENDIAN: v2i64_to_v2i64: 996 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 997 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 998 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 999 ; LITENDIAN: st.d [[R3]], 1000 ; LITENDIAN: .size v2i64_to_v2i64 1001 1002 ; BIGENDIAN: v2i64_to_v2i64: 1003 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1004 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1005 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1006 ; BIGENDIAN: st.d [[R3]], 1007 ; BIGENDIAN: .size v2i64_to_v2i64 1008 1009 define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind { 1010 entry: 1011 %0 = load volatile <2 x i64>, <2 x i64>* %src 1012 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0) 1013 %2 = bitcast <2 x i64> %1 to <2 x double> 1014 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2) 1015 store <2 x double> %3, <2 x double>* %dst 1016 ret void 1017 } 1018 1019 ; LITENDIAN: v2i64_to_v2f64: 1020 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1021 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1022 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1023 ; LITENDIAN: st.d [[R3]], 1024 ; LITENDIAN: .size v2i64_to_v2f64 1025 1026 ; BIGENDIAN: v2i64_to_v2f64: 1027 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1028 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1029 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1030 ; BIGENDIAN: st.d [[R3]], 1031 ; BIGENDIAN: .size v2i64_to_v2f64 1032 1033 define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind { 1034 entry: 1035 %0 = load volatile <2 x double>, <2 x double>* %src 1036 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1037 %2 = bitcast <2 x double> %1 to <16 x i8> 1038 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2) 1039 store <16 x i8> %3, <16 x i8>* %dst 1040 ret void 1041 } 1042 1043 ; LITENDIAN: v2f64_to_v16i8: 1044 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1045 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1046 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1047 ; LITENDIAN: st.b [[R3]], 1048 ; LITENDIAN: .size v2f64_to_v16i8 1049 1050 ; BIGENDIAN: v2f64_to_v16i8: 1051 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1052 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1053 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27 1054 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177 1055 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]] 1056 ; BIGENDIAN: st.b [[R4]], 1057 ; BIGENDIAN: .size v2f64_to_v16i8 1058 1059 define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind { 1060 entry: 1061 %0 = load volatile <2 x double>, <2 x double>* %src 1062 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1063 %2 = bitcast <2 x double> %1 to <8 x i16> 1064 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2) 1065 store <8 x i16> %3, <8 x i16>* %dst 1066 ret void 1067 } 1068 1069 ; LITENDIAN: v2f64_to_v8i16: 1070 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1071 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1072 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1073 ; LITENDIAN: st.h [[R3]], 1074 ; LITENDIAN: .size v2f64_to_v8i16 1075 1076 ; BIGENDIAN: v2f64_to_v8i16: 1077 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1078 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1079 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27 1080 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]] 1081 ; BIGENDIAN: st.h [[R4]], 1082 ; BIGENDIAN: .size v2f64_to_v8i16 1083 1084 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there 1085 ; are no operations for v8f16 to put in the way. 1086 define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind { 1087 entry: 1088 %0 = load volatile <2 x double>, <2 x double>* %src 1089 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1090 %2 = bitcast <2 x double> %1 to <8 x half> 1091 store <8 x half> %2, <8 x half>* %dst 1092 ret void 1093 } 1094 1095 ; LITENDIAN: v2f64_to_v8f16: 1096 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1097 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1098 ; LITENDIAN: st.d [[R2]], 1099 ; LITENDIAN: .size v2f64_to_v8f16 1100 1101 ; BIGENDIAN: v2f64_to_v8f16: 1102 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1103 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1104 ; BIGENDIAN: st.d [[R2]], 1105 ; BIGENDIAN: .size v2f64_to_v8f16 1106 1107 define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind { 1108 entry: 1109 %0 = load volatile <2 x double>, <2 x double>* %src 1110 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1111 %2 = bitcast <2 x double> %1 to <4 x i32> 1112 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2) 1113 store <4 x i32> %3, <4 x i32>* %dst 1114 ret void 1115 } 1116 1117 ; LITENDIAN: v2f64_to_v4i32: 1118 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1119 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1120 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1121 ; LITENDIAN: st.w [[R3]], 1122 ; LITENDIAN: .size v2f64_to_v4i32 1123 1124 ; BIGENDIAN: v2f64_to_v4i32: 1125 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1126 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1127 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 1128 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 1129 ; BIGENDIAN: st.w [[R4]], 1130 ; BIGENDIAN: .size v2f64_to_v4i32 1131 1132 define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind { 1133 entry: 1134 %0 = load volatile <2 x double>, <2 x double>* %src 1135 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1136 %2 = bitcast <2 x double> %1 to <4 x float> 1137 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2) 1138 store <4 x float> %3, <4 x float>* %dst 1139 ret void 1140 } 1141 1142 ; LITENDIAN: v2f64_to_v4f32: 1143 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1144 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1145 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1146 ; LITENDIAN: st.w [[R3]], 1147 ; LITENDIAN: .size v2f64_to_v4f32 1148 1149 ; BIGENDIAN: v2f64_to_v4f32: 1150 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1151 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1152 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177 1153 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]] 1154 ; BIGENDIAN: st.w [[R4]], 1155 ; BIGENDIAN: .size v2f64_to_v4f32 1156 1157 define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind { 1158 entry: 1159 %0 = load volatile <2 x double>, <2 x double>* %src 1160 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1161 %2 = bitcast <2 x double> %1 to <2 x i64> 1162 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2) 1163 store <2 x i64> %3, <2 x i64>* %dst 1164 ret void 1165 } 1166 1167 ; LITENDIAN: v2f64_to_v2i64: 1168 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1169 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1170 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1171 ; LITENDIAN: st.d [[R3]], 1172 ; LITENDIAN: .size v2f64_to_v2i64 1173 1174 ; BIGENDIAN: v2f64_to_v2i64: 1175 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1176 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1177 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1178 ; BIGENDIAN: st.d [[R3]], 1179 ; BIGENDIAN: .size v2f64_to_v2i64 1180 1181 define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind { 1182 entry: 1183 %0 = load volatile <2 x double>, <2 x double>* %src 1184 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0) 1185 %2 = bitcast <2 x double> %1 to <2 x double> 1186 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2) 1187 store <2 x double> %3, <2 x double>* %dst 1188 ret void 1189 } 1190 1191 ; LITENDIAN: v2f64_to_v2f64: 1192 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]], 1193 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1194 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1195 ; LITENDIAN: st.d [[R3]], 1196 ; LITENDIAN: .size v2f64_to_v2f64 1197 1198 ; BIGENDIAN: v2f64_to_v2f64: 1199 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]], 1200 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 1201 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1202 ; BIGENDIAN: st.d [[R3]], 1203 ; BIGENDIAN: .size v2f64_to_v2f64 1204 1205 declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind 1206 declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind 1207 declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind 1208 declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind 1209 declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind 1210 declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind 1211