1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4 define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 5 ; CHECK: add_v4f32: 6 7 %1 = load <4 x float>* %a 8 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <4 x float>* %b 10 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 11 %3 = fadd <4 x float> %1, %2 12 ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <4 x float> %3, <4 x float>* %c 14 ; CHECK-DAG: st.w [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size add_v4f32 18 } 19 20 define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 21 ; CHECK: add_v2f64: 22 23 %1 = load <2 x double>* %a 24 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <2 x double>* %b 26 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 27 %3 = fadd <2 x double> %1, %2 28 ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <2 x double> %3, <2 x double>* %c 30 ; CHECK-DAG: st.d [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size add_v2f64 34 } 35 36 define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 37 ; CHECK: sub_v4f32: 38 39 %1 = load <4 x float>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x float>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = fsub <4 x float> %1, %2 44 ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x float> %3, <4 x float>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size sub_v4f32 50 } 51 52 define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 53 ; CHECK: sub_v2f64: 54 55 %1 = load <2 x double>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x double>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = fsub <2 x double> %1, %2 60 ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x double> %3, <2 x double>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size sub_v2f64 66 } 67 68 define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 69 ; CHECK: mul_v4f32: 70 71 %1 = load <4 x float>* %a 72 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 73 %2 = load <4 x float>* %b 74 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 75 %3 = fmul <4 x float> %1, %2 76 ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 77 store <4 x float> %3, <4 x float>* %c 78 ; CHECK-DAG: st.w [[R3]], 0($4) 79 80 ret void 81 ; CHECK: .size mul_v4f32 82 } 83 84 define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 85 ; CHECK: mul_v2f64: 86 87 %1 = load <2 x double>* %a 88 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 89 %2 = load <2 x double>* %b 90 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 91 %3 = fmul <2 x double> %1, %2 92 ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 93 store <2 x double> %3, <2 x double>* %c 94 ; CHECK-DAG: st.d [[R3]], 0($4) 95 96 ret void 97 ; CHECK: .size mul_v2f64 98 } 99 100 define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 101 <4 x float>* %c) nounwind { 102 ; CHECK: fma_v4f32: 103 104 %1 = load <4 x float>* %a 105 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 106 %2 = load <4 x float>* %b 107 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 108 %3 = load <4 x float>* %c 109 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 110 %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2, 111 <4 x float> %3) 112 ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]] 113 store <4 x float> %4, <4 x float>* %d 114 ; CHECK-DAG: st.w [[R1]], 0($4) 115 116 ret void 117 ; CHECK: .size fma_v4f32 118 } 119 120 define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 121 <2 x double>* %c) nounwind { 122 ; CHECK: fma_v2f64: 123 124 %1 = load <2 x double>* %a 125 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 126 %2 = load <2 x double>* %b 127 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 128 %3 = load <2 x double>* %c 129 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 130 %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2, 131 <2 x double> %3) 132 ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]] 133 store <2 x double> %4, <2 x double>* %d 134 ; CHECK-DAG: st.d [[R1]], 0($4) 135 136 ret void 137 ; CHECK: .size fma_v2f64 138 } 139 140 define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 141 <4 x float>* %c) nounwind { 142 ; CHECK: fmsub_v4f32: 143 144 %1 = load <4 x float>* %a 145 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 146 %2 = load <4 x float>* %b 147 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 148 %3 = load <4 x float>* %c 149 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 150 %4 = fmul <4 x float> %2, %3 151 %5 = fsub <4 x float> %1, %4 152 ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]] 153 store <4 x float> %5, <4 x float>* %d 154 ; CHECK-DAG: st.w [[R1]], 0($4) 155 156 ret void 157 ; CHECK: .size fmsub_v4f32 158 } 159 160 define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 161 <2 x double>* %c) nounwind { 162 ; CHECK: fmsub_v2f64: 163 164 %1 = load <2 x double>* %a 165 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 166 %2 = load <2 x double>* %b 167 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 168 %3 = load <2 x double>* %c 169 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 170 %4 = fmul <2 x double> %2, %3 171 %5 = fsub <2 x double> %1, %4 172 ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]] 173 store <2 x double> %5, <2 x double>* %d 174 ; CHECK-DAG: st.d [[R1]], 0($4) 175 176 ret void 177 ; CHECK: .size fmsub_v2f64 178 } 179 180 define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 181 ; CHECK: fdiv_v4f32: 182 183 %1 = load <4 x float>* %a 184 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 185 %2 = load <4 x float>* %b 186 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 187 %3 = fdiv <4 x float> %1, %2 188 ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 189 store <4 x float> %3, <4 x float>* %c 190 ; CHECK-DAG: st.w [[R3]], 0($4) 191 192 ret void 193 ; CHECK: .size fdiv_v4f32 194 } 195 196 define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 197 ; CHECK: fdiv_v2f64: 198 199 %1 = load <2 x double>* %a 200 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 201 %2 = load <2 x double>* %b 202 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 203 %3 = fdiv <2 x double> %1, %2 204 ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 205 store <2 x double> %3, <2 x double>* %c 206 ; CHECK-DAG: st.d [[R3]], 0($4) 207 208 ret void 209 ; CHECK: .size fdiv_v2f64 210 } 211 212 define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 213 ; CHECK: fabs_v4f32: 214 215 %1 = load <4 x float>* %a 216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 217 %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1) 218 ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 219 store <4 x float> %2, <4 x float>* %c 220 ; CHECK-DAG: st.w [[R3]], 0($4) 221 222 ret void 223 ; CHECK: .size fabs_v4f32 224 } 225 226 define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 227 ; CHECK: fabs_v2f64: 228 229 %1 = load <2 x double>* %a 230 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 231 %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1) 232 ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]] 233 store <2 x double> %2, <2 x double>* %c 234 ; CHECK-DAG: st.d [[R3]], 0($4) 235 236 ret void 237 ; CHECK: .size fabs_v2f64 238 } 239 240 define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 241 ; CHECK: fexp2_v4f32: 242 243 %1 = load <4 x float>* %a 244 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 245 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 246 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 247 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 248 ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]] 249 store <4 x float> %2, <4 x float>* %c 250 ; CHECK-DAG: st.w [[R4]], 0($4) 251 252 ret void 253 ; CHECK: .size fexp2_v4f32 254 } 255 256 define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 257 ; CHECK: fexp2_v2f64: 258 259 %1 = load <2 x double>* %a 260 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 261 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 262 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 263 ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]] 264 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 265 store <2 x double> %2, <2 x double>* %c 266 ; CHECK-DAG: st.d [[R4]], 0($4) 267 268 ret void 269 ; CHECK: .size fexp2_v2f64 270 } 271 272 define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind { 273 ; CHECK: fexp2_v4f32_2: 274 275 %1 = load <4 x float>* %a 276 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 277 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 278 %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2 279 ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384 280 ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]] 281 ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]] 282 store <4 x float> %3, <4 x float>* %c 283 ; CHECK-DAG: st.w [[R5]], 0($4) 284 285 ret void 286 ; CHECK: .size fexp2_v4f32_2 287 } 288 289 define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind { 290 ; CHECK: .8byte 4611686018427387904 291 ; CHECK-NEXT: .8byte 4611686018427387904 292 ; CHECK: fexp2_v2f64_2: 293 294 %1 = load <2 x double>* %a 295 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 296 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 297 %3 = fmul <2 x double> <double 2.0, double 2.0>, %2 298 ; CHECK-DAG: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 299 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[G_PTR]]) 300 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 301 store <2 x double> %3, <2 x double>* %c 302 ; CHECK-DAG: st.d [[R4]], 0($4) 303 304 ret void 305 ; CHECK: .size fexp2_v2f64_2 306 } 307 308 define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 309 ; CHECK: fsqrt_v4f32: 310 311 %1 = load <4 x float>* %a 312 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 313 %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1) 314 ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]] 315 store <4 x float> %2, <4 x float>* %c 316 ; CHECK-DAG: st.w [[R3]], 0($4) 317 318 ret void 319 ; CHECK: .size fsqrt_v4f32 320 } 321 322 define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 323 ; CHECK: fsqrt_v2f64: 324 325 %1 = load <2 x double>* %a 326 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 327 %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1) 328 ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]] 329 store <2 x double> %2, <2 x double>* %c 330 ; CHECK-DAG: st.d [[R3]], 0($4) 331 332 ret void 333 ; CHECK: .size fsqrt_v2f64 334 } 335 336 define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 337 ; CHECK: ffint_u_v4f32: 338 339 %1 = load <4 x i32>* %a 340 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 341 %2 = uitofp <4 x i32> %1 to <4 x float> 342 ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]] 343 store <4 x float> %2, <4 x float>* %c 344 ; CHECK-DAG: st.w [[R3]], 0($4) 345 346 ret void 347 ; CHECK: .size ffint_u_v4f32 348 } 349 350 define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 351 ; CHECK: ffint_u_v2f64: 352 353 %1 = load <2 x i64>* %a 354 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 355 %2 = uitofp <2 x i64> %1 to <2 x double> 356 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]] 357 store <2 x double> %2, <2 x double>* %c 358 ; CHECK-DAG: st.d [[R3]], 0($4) 359 360 ret void 361 ; CHECK: .size ffint_u_v2f64 362 } 363 364 define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 365 ; CHECK: ffint_s_v4f32: 366 367 %1 = load <4 x i32>* %a 368 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 369 %2 = sitofp <4 x i32> %1 to <4 x float> 370 ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]] 371 store <4 x float> %2, <4 x float>* %c 372 ; CHECK-DAG: st.w [[R3]], 0($4) 373 374 ret void 375 ; CHECK: .size ffint_s_v4f32 376 } 377 378 define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 379 ; CHECK: ffint_s_v2f64: 380 381 %1 = load <2 x i64>* %a 382 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 383 %2 = sitofp <2 x i64> %1 to <2 x double> 384 ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]] 385 store <2 x double> %2, <2 x double>* %c 386 ; CHECK-DAG: st.d [[R3]], 0($4) 387 388 ret void 389 ; CHECK: .size ffint_s_v2f64 390 } 391 392 define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 393 ; CHECK: ftrunc_u_v4f32: 394 395 %1 = load <4 x float>* %a 396 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 397 %2 = fptoui <4 x float> %1 to <4 x i32> 398 ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]] 399 store <4 x i32> %2, <4 x i32>* %c 400 ; CHECK-DAG: st.w [[R3]], 0($4) 401 402 ret void 403 ; CHECK: .size ftrunc_u_v4f32 404 } 405 406 define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 407 ; CHECK: ftrunc_u_v2f64: 408 409 %1 = load <2 x double>* %a 410 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 411 %2 = fptoui <2 x double> %1 to <2 x i64> 412 ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]] 413 store <2 x i64> %2, <2 x i64>* %c 414 ; CHECK-DAG: st.d [[R3]], 0($4) 415 416 ret void 417 ; CHECK: .size ftrunc_u_v2f64 418 } 419 420 define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 421 ; CHECK: ftrunc_s_v4f32: 422 423 %1 = load <4 x float>* %a 424 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 425 %2 = fptosi <4 x float> %1 to <4 x i32> 426 ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]] 427 store <4 x i32> %2, <4 x i32>* %c 428 ; CHECK-DAG: st.w [[R3]], 0($4) 429 430 ret void 431 ; CHECK: .size ftrunc_s_v4f32 432 } 433 434 define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 435 ; CHECK: ftrunc_s_v2f64: 436 437 %1 = load <2 x double>* %a 438 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 439 %2 = fptosi <2 x double> %1 to <2 x i64> 440 ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]] 441 store <2 x i64> %2, <2 x i64>* %c 442 ; CHECK-DAG: st.d [[R3]], 0($4) 443 444 ret void 445 ; CHECK: .size ftrunc_s_v2f64 446 } 447 448 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val) 449 declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val) 450 declare <4 x float> @llvm.exp2.v4f32(<4 x float> %val) 451 declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val) 452 declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, 453 <4 x float> %c) 454 declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, 455 <2 x double> %c) 456 declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) 457 declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) 458