1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 ; rdar://13082402 3 4 define float @t1(i32* nocapture %src) nounwind ssp { 5 entry: 6 ; CHECK-LABEL: t1: 7 ; CHECK: ldr s0, [x0] 8 ; CHECK: scvtf s0, s0 9 %tmp1 = load i32* %src, align 4 10 %tmp2 = sitofp i32 %tmp1 to float 11 ret float %tmp2 12 } 13 14 define float @t2(i32* nocapture %src) nounwind ssp { 15 entry: 16 ; CHECK-LABEL: t2: 17 ; CHECK: ldr s0, [x0] 18 ; CHECK: ucvtf s0, s0 19 %tmp1 = load i32* %src, align 4 20 %tmp2 = uitofp i32 %tmp1 to float 21 ret float %tmp2 22 } 23 24 define double @t3(i64* nocapture %src) nounwind ssp { 25 entry: 26 ; CHECK-LABEL: t3: 27 ; CHECK: ldr d0, [x0] 28 ; CHECK: scvtf d0, d0 29 %tmp1 = load i64* %src, align 4 30 %tmp2 = sitofp i64 %tmp1 to double 31 ret double %tmp2 32 } 33 34 define double @t4(i64* nocapture %src) nounwind ssp { 35 entry: 36 ; CHECK-LABEL: t4: 37 ; CHECK: ldr d0, [x0] 38 ; CHECK: ucvtf d0, d0 39 %tmp1 = load i64* %src, align 4 40 %tmp2 = uitofp i64 %tmp1 to double 41 ret double %tmp2 42 } 43 44 ; rdar://13136456 45 define double @t5(i32* nocapture %src) nounwind ssp optsize { 46 entry: 47 ; CHECK-LABEL: t5: 48 ; CHECK: ldr [[REG:w[0-9]+]], [x0] 49 ; CHECK: scvtf d0, [[REG]] 50 %tmp1 = load i32* %src, align 4 51 %tmp2 = sitofp i32 %tmp1 to double 52 ret double %tmp2 53 } 54 55 ; Check that we load in FP register when we want to convert into 56 ; floating point value. 57 ; This is much faster than loading on GPR and making the conversion 58 ; GPR -> FPR. 59 ; <rdar://problem/14599607> 60 ; 61 ; Check the flollowing patterns for signed/unsigned: 62 ; 1. load with scaled imm to float. 63 ; 2. load with scaled register to float. 64 ; 3. load with scaled imm to double. 65 ; 4. load with scaled register to double. 66 ; 5. load with unscaled imm to float. 67 ; 6. load with unscaled imm to double. 68 ; With loading size: 8, 16, 32, and 64-bits. 69 70 ; ********* 1. load with scaled imm to float. ********* 71 define float @fct1(i8* nocapture %sp0) { 72 ; CHECK-LABEL: fct1: 73 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 74 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 75 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 76 entry: 77 %addr = getelementptr i8* %sp0, i64 1 78 %pix_sp0.0.copyload = load i8* %addr, align 1 79 %val = uitofp i8 %pix_sp0.0.copyload to float 80 %vmull.i = fmul float %val, %val 81 ret float %vmull.i 82 } 83 84 define float @fct2(i16* nocapture %sp0) { 85 ; CHECK-LABEL: fct2: 86 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 87 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 88 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 89 entry: 90 %addr = getelementptr i16* %sp0, i64 1 91 %pix_sp0.0.copyload = load i16* %addr, align 1 92 %val = uitofp i16 %pix_sp0.0.copyload to float 93 %vmull.i = fmul float %val, %val 94 ret float %vmull.i 95 } 96 97 define float @fct3(i32* nocapture %sp0) { 98 ; CHECK-LABEL: fct3: 99 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 100 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 101 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 102 entry: 103 %addr = getelementptr i32* %sp0, i64 1 104 %pix_sp0.0.copyload = load i32* %addr, align 1 105 %val = uitofp i32 %pix_sp0.0.copyload to float 106 %vmull.i = fmul float %val, %val 107 ret float %vmull.i 108 } 109 110 ; i64 -> f32 is not supported on floating point unit. 111 define float @fct4(i64* nocapture %sp0) { 112 ; CHECK-LABEL: fct4: 113 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 114 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 115 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 116 entry: 117 %addr = getelementptr i64* %sp0, i64 1 118 %pix_sp0.0.copyload = load i64* %addr, align 1 119 %val = uitofp i64 %pix_sp0.0.copyload to float 120 %vmull.i = fmul float %val, %val 121 ret float %vmull.i 122 } 123 124 ; ********* 2. load with scaled register to float. ********* 125 define float @fct5(i8* nocapture %sp0, i64 %offset) { 126 ; CHECK-LABEL: fct5: 127 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 128 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 129 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 130 entry: 131 %addr = getelementptr i8* %sp0, i64 %offset 132 %pix_sp0.0.copyload = load i8* %addr, align 1 133 %val = uitofp i8 %pix_sp0.0.copyload to float 134 %vmull.i = fmul float %val, %val 135 ret float %vmull.i 136 } 137 138 define float @fct6(i16* nocapture %sp0, i64 %offset) { 139 ; CHECK-LABEL: fct6: 140 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 141 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 142 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 143 entry: 144 %addr = getelementptr i16* %sp0, i64 %offset 145 %pix_sp0.0.copyload = load i16* %addr, align 1 146 %val = uitofp i16 %pix_sp0.0.copyload to float 147 %vmull.i = fmul float %val, %val 148 ret float %vmull.i 149 } 150 151 define float @fct7(i32* nocapture %sp0, i64 %offset) { 152 ; CHECK-LABEL: fct7: 153 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 154 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 155 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 156 entry: 157 %addr = getelementptr i32* %sp0, i64 %offset 158 %pix_sp0.0.copyload = load i32* %addr, align 1 159 %val = uitofp i32 %pix_sp0.0.copyload to float 160 %vmull.i = fmul float %val, %val 161 ret float %vmull.i 162 } 163 164 ; i64 -> f32 is not supported on floating point unit. 165 define float @fct8(i64* nocapture %sp0, i64 %offset) { 166 ; CHECK-LABEL: fct8: 167 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 168 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 169 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 170 entry: 171 %addr = getelementptr i64* %sp0, i64 %offset 172 %pix_sp0.0.copyload = load i64* %addr, align 1 173 %val = uitofp i64 %pix_sp0.0.copyload to float 174 %vmull.i = fmul float %val, %val 175 ret float %vmull.i 176 } 177 178 179 ; ********* 3. load with scaled imm to double. ********* 180 define double @fct9(i8* nocapture %sp0) { 181 ; CHECK-LABEL: fct9: 182 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 183 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 184 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 185 entry: 186 %addr = getelementptr i8* %sp0, i64 1 187 %pix_sp0.0.copyload = load i8* %addr, align 1 188 %val = uitofp i8 %pix_sp0.0.copyload to double 189 %vmull.i = fmul double %val, %val 190 ret double %vmull.i 191 } 192 193 define double @fct10(i16* nocapture %sp0) { 194 ; CHECK-LABEL: fct10: 195 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 196 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 197 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 198 entry: 199 %addr = getelementptr i16* %sp0, i64 1 200 %pix_sp0.0.copyload = load i16* %addr, align 1 201 %val = uitofp i16 %pix_sp0.0.copyload to double 202 %vmull.i = fmul double %val, %val 203 ret double %vmull.i 204 } 205 206 define double @fct11(i32* nocapture %sp0) { 207 ; CHECK-LABEL: fct11: 208 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 209 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 210 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 211 entry: 212 %addr = getelementptr i32* %sp0, i64 1 213 %pix_sp0.0.copyload = load i32* %addr, align 1 214 %val = uitofp i32 %pix_sp0.0.copyload to double 215 %vmull.i = fmul double %val, %val 216 ret double %vmull.i 217 } 218 219 define double @fct12(i64* nocapture %sp0) { 220 ; CHECK-LABEL: fct12: 221 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 222 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 223 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 224 entry: 225 %addr = getelementptr i64* %sp0, i64 1 226 %pix_sp0.0.copyload = load i64* %addr, align 1 227 %val = uitofp i64 %pix_sp0.0.copyload to double 228 %vmull.i = fmul double %val, %val 229 ret double %vmull.i 230 } 231 232 ; ********* 4. load with scaled register to double. ********* 233 define double @fct13(i8* nocapture %sp0, i64 %offset) { 234 ; CHECK-LABEL: fct13: 235 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 236 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 237 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 238 entry: 239 %addr = getelementptr i8* %sp0, i64 %offset 240 %pix_sp0.0.copyload = load i8* %addr, align 1 241 %val = uitofp i8 %pix_sp0.0.copyload to double 242 %vmull.i = fmul double %val, %val 243 ret double %vmull.i 244 } 245 246 define double @fct14(i16* nocapture %sp0, i64 %offset) { 247 ; CHECK-LABEL: fct14: 248 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 249 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 250 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 251 entry: 252 %addr = getelementptr i16* %sp0, i64 %offset 253 %pix_sp0.0.copyload = load i16* %addr, align 1 254 %val = uitofp i16 %pix_sp0.0.copyload to double 255 %vmull.i = fmul double %val, %val 256 ret double %vmull.i 257 } 258 259 define double @fct15(i32* nocapture %sp0, i64 %offset) { 260 ; CHECK-LABEL: fct15: 261 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 262 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 263 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 264 entry: 265 %addr = getelementptr i32* %sp0, i64 %offset 266 %pix_sp0.0.copyload = load i32* %addr, align 1 267 %val = uitofp i32 %pix_sp0.0.copyload to double 268 %vmull.i = fmul double %val, %val 269 ret double %vmull.i 270 } 271 272 define double @fct16(i64* nocapture %sp0, i64 %offset) { 273 ; CHECK-LABEL: fct16: 274 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 275 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 276 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 277 entry: 278 %addr = getelementptr i64* %sp0, i64 %offset 279 %pix_sp0.0.copyload = load i64* %addr, align 1 280 %val = uitofp i64 %pix_sp0.0.copyload to double 281 %vmull.i = fmul double %val, %val 282 ret double %vmull.i 283 } 284 285 ; ********* 5. load with unscaled imm to float. ********* 286 define float @fct17(i8* nocapture %sp0) { 287 entry: 288 ; CHECK-LABEL: fct17: 289 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 290 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 291 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 292 %bitcast = ptrtoint i8* %sp0 to i64 293 %add = add i64 %bitcast, -1 294 %addr = inttoptr i64 %add to i8* 295 %pix_sp0.0.copyload = load i8* %addr, align 1 296 %val = uitofp i8 %pix_sp0.0.copyload to float 297 %vmull.i = fmul float %val, %val 298 ret float %vmull.i 299 } 300 301 define float @fct18(i16* nocapture %sp0) { 302 ; CHECK-LABEL: fct18: 303 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 304 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 305 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 306 %bitcast = ptrtoint i16* %sp0 to i64 307 %add = add i64 %bitcast, 1 308 %addr = inttoptr i64 %add to i16* 309 %pix_sp0.0.copyload = load i16* %addr, align 1 310 %val = uitofp i16 %pix_sp0.0.copyload to float 311 %vmull.i = fmul float %val, %val 312 ret float %vmull.i 313 } 314 315 define float @fct19(i32* nocapture %sp0) { 316 ; CHECK-LABEL: fct19: 317 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 318 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 319 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 320 %bitcast = ptrtoint i32* %sp0 to i64 321 %add = add i64 %bitcast, 1 322 %addr = inttoptr i64 %add to i32* 323 %pix_sp0.0.copyload = load i32* %addr, align 1 324 %val = uitofp i32 %pix_sp0.0.copyload to float 325 %vmull.i = fmul float %val, %val 326 ret float %vmull.i 327 } 328 329 ; i64 -> f32 is not supported on floating point unit. 330 define float @fct20(i64* nocapture %sp0) { 331 ; CHECK-LABEL: fct20: 332 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 333 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 334 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 335 %bitcast = ptrtoint i64* %sp0 to i64 336 %add = add i64 %bitcast, 1 337 %addr = inttoptr i64 %add to i64* 338 %pix_sp0.0.copyload = load i64* %addr, align 1 339 %val = uitofp i64 %pix_sp0.0.copyload to float 340 %vmull.i = fmul float %val, %val 341 ret float %vmull.i 342 343 } 344 345 ; ********* 6. load with unscaled imm to double. ********* 346 define double @fct21(i8* nocapture %sp0) { 347 entry: 348 ; CHECK-LABEL: fct21: 349 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 350 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 351 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 352 %bitcast = ptrtoint i8* %sp0 to i64 353 %add = add i64 %bitcast, -1 354 %addr = inttoptr i64 %add to i8* 355 %pix_sp0.0.copyload = load i8* %addr, align 1 356 %val = uitofp i8 %pix_sp0.0.copyload to double 357 %vmull.i = fmul double %val, %val 358 ret double %vmull.i 359 } 360 361 define double @fct22(i16* nocapture %sp0) { 362 ; CHECK-LABEL: fct22: 363 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 364 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 365 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 366 %bitcast = ptrtoint i16* %sp0 to i64 367 %add = add i64 %bitcast, 1 368 %addr = inttoptr i64 %add to i16* 369 %pix_sp0.0.copyload = load i16* %addr, align 1 370 %val = uitofp i16 %pix_sp0.0.copyload to double 371 %vmull.i = fmul double %val, %val 372 ret double %vmull.i 373 } 374 375 define double @fct23(i32* nocapture %sp0) { 376 ; CHECK-LABEL: fct23: 377 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 378 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 379 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 380 %bitcast = ptrtoint i32* %sp0 to i64 381 %add = add i64 %bitcast, 1 382 %addr = inttoptr i64 %add to i32* 383 %pix_sp0.0.copyload = load i32* %addr, align 1 384 %val = uitofp i32 %pix_sp0.0.copyload to double 385 %vmull.i = fmul double %val, %val 386 ret double %vmull.i 387 } 388 389 define double @fct24(i64* nocapture %sp0) { 390 ; CHECK-LABEL: fct24: 391 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 392 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 393 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 394 %bitcast = ptrtoint i64* %sp0 to i64 395 %add = add i64 %bitcast, 1 396 %addr = inttoptr i64 %add to i64* 397 %pix_sp0.0.copyload = load i64* %addr, align 1 398 %val = uitofp i64 %pix_sp0.0.copyload to double 399 %vmull.i = fmul double %val, %val 400 ret double %vmull.i 401 402 } 403 404 ; ********* 1s. load with scaled imm to float. ********* 405 define float @sfct1(i8* nocapture %sp0) { 406 ; CHECK-LABEL: sfct1: 407 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 408 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 409 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 410 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 411 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 412 entry: 413 %addr = getelementptr i8* %sp0, i64 1 414 %pix_sp0.0.copyload = load i8* %addr, align 1 415 %val = sitofp i8 %pix_sp0.0.copyload to float 416 %vmull.i = fmul float %val, %val 417 ret float %vmull.i 418 } 419 420 define float @sfct2(i16* nocapture %sp0) { 421 ; CHECK-LABEL: sfct2: 422 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 423 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 424 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 425 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 426 entry: 427 %addr = getelementptr i16* %sp0, i64 1 428 %pix_sp0.0.copyload = load i16* %addr, align 1 429 %val = sitofp i16 %pix_sp0.0.copyload to float 430 %vmull.i = fmul float %val, %val 431 ret float %vmull.i 432 } 433 434 define float @sfct3(i32* nocapture %sp0) { 435 ; CHECK-LABEL: sfct3: 436 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 437 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 438 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 439 entry: 440 %addr = getelementptr i32* %sp0, i64 1 441 %pix_sp0.0.copyload = load i32* %addr, align 1 442 %val = sitofp i32 %pix_sp0.0.copyload to float 443 %vmull.i = fmul float %val, %val 444 ret float %vmull.i 445 } 446 447 ; i64 -> f32 is not supported on floating point unit. 448 define float @sfct4(i64* nocapture %sp0) { 449 ; CHECK-LABEL: sfct4: 450 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 451 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 452 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 453 entry: 454 %addr = getelementptr i64* %sp0, i64 1 455 %pix_sp0.0.copyload = load i64* %addr, align 1 456 %val = sitofp i64 %pix_sp0.0.copyload to float 457 %vmull.i = fmul float %val, %val 458 ret float %vmull.i 459 } 460 461 ; ********* 2s. load with scaled register to float. ********* 462 define float @sfct5(i8* nocapture %sp0, i64 %offset) { 463 ; CHECK-LABEL: sfct5: 464 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 465 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 466 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 467 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 468 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 469 entry: 470 %addr = getelementptr i8* %sp0, i64 %offset 471 %pix_sp0.0.copyload = load i8* %addr, align 1 472 %val = sitofp i8 %pix_sp0.0.copyload to float 473 %vmull.i = fmul float %val, %val 474 ret float %vmull.i 475 } 476 477 define float @sfct6(i16* nocapture %sp0, i64 %offset) { 478 ; CHECK-LABEL: sfct6: 479 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 480 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 481 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 482 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 483 entry: 484 %addr = getelementptr i16* %sp0, i64 %offset 485 %pix_sp0.0.copyload = load i16* %addr, align 1 486 %val = sitofp i16 %pix_sp0.0.copyload to float 487 %vmull.i = fmul float %val, %val 488 ret float %vmull.i 489 } 490 491 define float @sfct7(i32* nocapture %sp0, i64 %offset) { 492 ; CHECK-LABEL: sfct7: 493 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 494 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 495 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 496 entry: 497 %addr = getelementptr i32* %sp0, i64 %offset 498 %pix_sp0.0.copyload = load i32* %addr, align 1 499 %val = sitofp i32 %pix_sp0.0.copyload to float 500 %vmull.i = fmul float %val, %val 501 ret float %vmull.i 502 } 503 504 ; i64 -> f32 is not supported on floating point unit. 505 define float @sfct8(i64* nocapture %sp0, i64 %offset) { 506 ; CHECK-LABEL: sfct8: 507 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 508 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 509 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 510 entry: 511 %addr = getelementptr i64* %sp0, i64 %offset 512 %pix_sp0.0.copyload = load i64* %addr, align 1 513 %val = sitofp i64 %pix_sp0.0.copyload to float 514 %vmull.i = fmul float %val, %val 515 ret float %vmull.i 516 } 517 518 ; ********* 3s. load with scaled imm to double. ********* 519 define double @sfct9(i8* nocapture %sp0) { 520 ; CHECK-LABEL: sfct9: 521 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1] 522 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 523 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 524 entry: 525 %addr = getelementptr i8* %sp0, i64 1 526 %pix_sp0.0.copyload = load i8* %addr, align 1 527 %val = sitofp i8 %pix_sp0.0.copyload to double 528 %vmull.i = fmul double %val, %val 529 ret double %vmull.i 530 } 531 532 define double @sfct10(i16* nocapture %sp0) { 533 ; CHECK-LABEL: sfct10: 534 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 535 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 536 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 537 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 538 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 539 entry: 540 %addr = getelementptr i16* %sp0, i64 1 541 %pix_sp0.0.copyload = load i16* %addr, align 1 542 %val = sitofp i16 %pix_sp0.0.copyload to double 543 %vmull.i = fmul double %val, %val 544 ret double %vmull.i 545 } 546 547 define double @sfct11(i32* nocapture %sp0) { 548 ; CHECK-LABEL: sfct11: 549 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 550 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 551 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 552 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 553 entry: 554 %addr = getelementptr i32* %sp0, i64 1 555 %pix_sp0.0.copyload = load i32* %addr, align 1 556 %val = sitofp i32 %pix_sp0.0.copyload to double 557 %vmull.i = fmul double %val, %val 558 ret double %vmull.i 559 } 560 561 define double @sfct12(i64* nocapture %sp0) { 562 ; CHECK-LABEL: sfct12: 563 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 564 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 565 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 566 entry: 567 %addr = getelementptr i64* %sp0, i64 1 568 %pix_sp0.0.copyload = load i64* %addr, align 1 569 %val = sitofp i64 %pix_sp0.0.copyload to double 570 %vmull.i = fmul double %val, %val 571 ret double %vmull.i 572 } 573 574 ; ********* 4s. load with scaled register to double. ********* 575 define double @sfct13(i8* nocapture %sp0, i64 %offset) { 576 ; CHECK-LABEL: sfct13: 577 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1] 578 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 579 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 580 entry: 581 %addr = getelementptr i8* %sp0, i64 %offset 582 %pix_sp0.0.copyload = load i8* %addr, align 1 583 %val = sitofp i8 %pix_sp0.0.copyload to double 584 %vmull.i = fmul double %val, %val 585 ret double %vmull.i 586 } 587 588 define double @sfct14(i16* nocapture %sp0, i64 %offset) { 589 ; CHECK-LABEL: sfct14: 590 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 591 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 592 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 593 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 594 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 595 entry: 596 %addr = getelementptr i16* %sp0, i64 %offset 597 %pix_sp0.0.copyload = load i16* %addr, align 1 598 %val = sitofp i16 %pix_sp0.0.copyload to double 599 %vmull.i = fmul double %val, %val 600 ret double %vmull.i 601 } 602 603 define double @sfct15(i32* nocapture %sp0, i64 %offset) { 604 ; CHECK-LABEL: sfct15: 605 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 606 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 607 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 608 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 609 entry: 610 %addr = getelementptr i32* %sp0, i64 %offset 611 %pix_sp0.0.copyload = load i32* %addr, align 1 612 %val = sitofp i32 %pix_sp0.0.copyload to double 613 %vmull.i = fmul double %val, %val 614 ret double %vmull.i 615 } 616 617 define double @sfct16(i64* nocapture %sp0, i64 %offset) { 618 ; CHECK-LABEL: sfct16: 619 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 620 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 621 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 622 entry: 623 %addr = getelementptr i64* %sp0, i64 %offset 624 %pix_sp0.0.copyload = load i64* %addr, align 1 625 %val = sitofp i64 %pix_sp0.0.copyload to double 626 %vmull.i = fmul double %val, %val 627 ret double %vmull.i 628 } 629 630 ; ********* 5s. load with unscaled imm to float. ********* 631 define float @sfct17(i8* nocapture %sp0) { 632 entry: 633 ; CHECK-LABEL: sfct17: 634 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 635 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 636 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 637 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 638 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 639 %bitcast = ptrtoint i8* %sp0 to i64 640 %add = add i64 %bitcast, -1 641 %addr = inttoptr i64 %add to i8* 642 %pix_sp0.0.copyload = load i8* %addr, align 1 643 %val = sitofp i8 %pix_sp0.0.copyload to float 644 %vmull.i = fmul float %val, %val 645 ret float %vmull.i 646 } 647 648 define float @sfct18(i16* nocapture %sp0) { 649 ; CHECK-LABEL: sfct18: 650 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 651 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 652 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 653 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 654 %bitcast = ptrtoint i16* %sp0 to i64 655 %add = add i64 %bitcast, 1 656 %addr = inttoptr i64 %add to i16* 657 %pix_sp0.0.copyload = load i16* %addr, align 1 658 %val = sitofp i16 %pix_sp0.0.copyload to float 659 %vmull.i = fmul float %val, %val 660 ret float %vmull.i 661 } 662 663 define float @sfct19(i32* nocapture %sp0) { 664 ; CHECK-LABEL: sfct19: 665 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 666 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 667 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 668 %bitcast = ptrtoint i32* %sp0 to i64 669 %add = add i64 %bitcast, 1 670 %addr = inttoptr i64 %add to i32* 671 %pix_sp0.0.copyload = load i32* %addr, align 1 672 %val = sitofp i32 %pix_sp0.0.copyload to float 673 %vmull.i = fmul float %val, %val 674 ret float %vmull.i 675 } 676 677 ; i64 -> f32 is not supported on floating point unit. 678 define float @sfct20(i64* nocapture %sp0) { 679 ; CHECK-LABEL: sfct20: 680 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 681 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 682 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 683 %bitcast = ptrtoint i64* %sp0 to i64 684 %add = add i64 %bitcast, 1 685 %addr = inttoptr i64 %add to i64* 686 %pix_sp0.0.copyload = load i64* %addr, align 1 687 %val = sitofp i64 %pix_sp0.0.copyload to float 688 %vmull.i = fmul float %val, %val 689 ret float %vmull.i 690 691 } 692 693 ; ********* 6s. load with unscaled imm to double. ********* 694 define double @sfct21(i8* nocapture %sp0) { 695 entry: 696 ; CHECK-LABEL: sfct21: 697 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 698 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 699 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 700 %bitcast = ptrtoint i8* %sp0 to i64 701 %add = add i64 %bitcast, -1 702 %addr = inttoptr i64 %add to i8* 703 %pix_sp0.0.copyload = load i8* %addr, align 1 704 %val = sitofp i8 %pix_sp0.0.copyload to double 705 %vmull.i = fmul double %val, %val 706 ret double %vmull.i 707 } 708 709 define double @sfct22(i16* nocapture %sp0) { 710 ; CHECK-LABEL: sfct22: 711 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 712 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 713 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 714 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 715 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 716 %bitcast = ptrtoint i16* %sp0 to i64 717 %add = add i64 %bitcast, 1 718 %addr = inttoptr i64 %add to i16* 719 %pix_sp0.0.copyload = load i16* %addr, align 1 720 %val = sitofp i16 %pix_sp0.0.copyload to double 721 %vmull.i = fmul double %val, %val 722 ret double %vmull.i 723 } 724 725 define double @sfct23(i32* nocapture %sp0) { 726 ; CHECK-LABEL: sfct23: 727 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 728 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 729 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 730 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 731 %bitcast = ptrtoint i32* %sp0 to i64 732 %add = add i64 %bitcast, 1 733 %addr = inttoptr i64 %add to i32* 734 %pix_sp0.0.copyload = load i32* %addr, align 1 735 %val = sitofp i32 %pix_sp0.0.copyload to double 736 %vmull.i = fmul double %val, %val 737 ret double %vmull.i 738 } 739 740 define double @sfct24(i64* nocapture %sp0) { 741 ; CHECK-LABEL: sfct24: 742 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 743 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 744 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 745 %bitcast = ptrtoint i64* %sp0 to i64 746 %add = add i64 %bitcast, 1 747 %addr = inttoptr i64 %add to i64* 748 %pix_sp0.0.copyload = load i64* %addr, align 1 749 %val = sitofp i64 %pix_sp0.0.copyload to double 750 %vmull.i = fmul double %val, %val 751 ret double %vmull.i 752 753 } 754 755 ; Check that we do not use SSHLL code sequence when code size is a concern. 756 define float @codesize_sfct17(i8* nocapture %sp0) optsize { 757 entry: 758 ; CHECK-LABEL: codesize_sfct17: 759 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 760 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 761 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 762 %bitcast = ptrtoint i8* %sp0 to i64 763 %add = add i64 %bitcast, -1 764 %addr = inttoptr i64 %add to i8* 765 %pix_sp0.0.copyload = load i8* %addr, align 1 766 %val = sitofp i8 %pix_sp0.0.copyload to float 767 %vmull.i = fmul float %val, %val 768 ret float %vmull.i 769 } 770 771 define double @codesize_sfct11(i32* nocapture %sp0) minsize { 772 ; CHECK-LABEL: sfct11: 773 ; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4] 774 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 775 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 776 entry: 777 %addr = getelementptr i32* %sp0, i64 1 778 %pix_sp0.0.copyload = load i32* %addr, align 1 779 %val = sitofp i32 %pix_sp0.0.copyload to double 780 %vmull.i = fmul double %val, %val 781 ret double %vmull.i 782 } 783 784 ; Adding fp128 custom lowering makes these a little fragile since we have to 785 ; return the correct mix of Legal/Expand from the custom method. 786 ; 787 ; rdar://problem/14991489 788 789 define float @float_from_i128(i128 %in) { 790 ; CHECK-LABEL: float_from_i128: 791 ; CHECK: bl {{_?__floatuntisf}} 792 %conv = uitofp i128 %in to float 793 ret float %conv 794 } 795 796 define double @double_from_i128(i128 %in) { 797 ; CHECK-LABEL: double_from_i128: 798 ; CHECK: bl {{_?__floattidf}} 799 %conv = sitofp i128 %in to double 800 ret double %conv 801 } 802 803 define fp128 @fp128_from_i128(i128 %in) { 804 ; CHECK-LABEL: fp128_from_i128: 805 ; CHECK: bl {{_?__floatuntitf}} 806 %conv = uitofp i128 %in to fp128 807 ret fp128 %conv 808 } 809 810 define i128 @i128_from_float(float %in) { 811 ; CHECK-LABEL: i128_from_float 812 ; CHECK: bl {{_?__fixsfti}} 813 %conv = fptosi float %in to i128 814 ret i128 %conv 815 } 816 817 define i128 @i128_from_double(double %in) { 818 ; CHECK-LABEL: i128_from_double 819 ; CHECK: bl {{_?__fixunsdfti}} 820 %conv = fptoui double %in to i128 821 ret i128 %conv 822 } 823 824 define i128 @i128_from_fp128(fp128 %in) { 825 ; CHECK-LABEL: i128_from_fp128 826 ; CHECK: bl {{_?__fixtfti}} 827 %conv = fptosi fp128 %in to i128 828 ret i128 %conv 829 } 830 831