1 ; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s 2 ; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s 3 ; rdar://13082402 4 5 define float @t1(i32* nocapture %src) nounwind ssp { 6 entry: 7 ; CHECK-LABEL: t1: 8 ; CHECK: ldr s0, [x0] 9 ; CHECK: scvtf s0, s0 10 %tmp1 = load i32, i32* %src, align 4 11 %tmp2 = sitofp i32 %tmp1 to float 12 ret float %tmp2 13 } 14 15 define float @t2(i32* nocapture %src) nounwind ssp { 16 entry: 17 ; CHECK-LABEL: t2: 18 ; CHECK: ldr s0, [x0] 19 ; CHECK: ucvtf s0, s0 20 %tmp1 = load i32, i32* %src, align 4 21 %tmp2 = uitofp i32 %tmp1 to float 22 ret float %tmp2 23 } 24 25 define double @t3(i64* nocapture %src) nounwind ssp { 26 entry: 27 ; CHECK-LABEL: t3: 28 ; CHECK: ldr d0, [x0] 29 ; CHECK: scvtf d0, d0 30 %tmp1 = load i64, i64* %src, align 4 31 %tmp2 = sitofp i64 %tmp1 to double 32 ret double %tmp2 33 } 34 35 define double @t4(i64* nocapture %src) nounwind ssp { 36 entry: 37 ; CHECK-LABEL: t4: 38 ; CHECK: ldr d0, [x0] 39 ; CHECK: ucvtf d0, d0 40 %tmp1 = load i64, i64* %src, align 4 41 %tmp2 = uitofp i64 %tmp1 to double 42 ret double %tmp2 43 } 44 45 ; rdar://13136456 46 define double @t5(i32* nocapture %src) nounwind ssp optsize { 47 entry: 48 ; CHECK-LABEL: t5: 49 ; CHECK: ldr [[REG:w[0-9]+]], [x0] 50 ; CHECK: scvtf d0, [[REG]] 51 %tmp1 = load i32, i32* %src, align 4 52 %tmp2 = sitofp i32 %tmp1 to double 53 ret double %tmp2 54 } 55 56 ; Check that we load in FP register when we want to convert into 57 ; floating point value. 58 ; This is much faster than loading on GPR and making the conversion 59 ; GPR -> FPR. 60 ; <rdar://problem/14599607> 61 ; 62 ; Check the flollowing patterns for signed/unsigned: 63 ; 1. load with scaled imm to float. 64 ; 2. load with scaled register to float. 65 ; 3. load with scaled imm to double. 66 ; 4. load with scaled register to double. 67 ; 5. load with unscaled imm to float. 68 ; 6. load with unscaled imm to double. 69 ; With loading size: 8, 16, 32, and 64-bits. 70 71 ; ********* 1. load with scaled imm to float. ********* 72 define float @fct1(i8* nocapture %sp0) { 73 ; CHECK-LABEL: fct1: 74 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 75 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 76 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 77 entry: 78 %addr = getelementptr i8, i8* %sp0, i64 1 79 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 80 %val = uitofp i8 %pix_sp0.0.copyload to float 81 %vmull.i = fmul float %val, %val 82 ret float %vmull.i 83 } 84 85 define float @fct2(i16* nocapture %sp0) { 86 ; CHECK-LABEL: fct2: 87 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 88 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 89 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 90 entry: 91 %addr = getelementptr i16, i16* %sp0, i64 1 92 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 93 %val = uitofp i16 %pix_sp0.0.copyload to float 94 %vmull.i = fmul float %val, %val 95 ret float %vmull.i 96 } 97 98 define float @fct3(i32* nocapture %sp0) { 99 ; CHECK-LABEL: fct3: 100 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 101 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 102 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 103 entry: 104 %addr = getelementptr i32, i32* %sp0, i64 1 105 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 106 %val = uitofp i32 %pix_sp0.0.copyload to float 107 %vmull.i = fmul float %val, %val 108 ret float %vmull.i 109 } 110 111 ; i64 -> f32 is not supported on floating point unit. 112 define float @fct4(i64* nocapture %sp0) { 113 ; CHECK-LABEL: fct4: 114 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 115 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 116 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 117 entry: 118 %addr = getelementptr i64, i64* %sp0, i64 1 119 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 120 %val = uitofp i64 %pix_sp0.0.copyload to float 121 %vmull.i = fmul float %val, %val 122 ret float %vmull.i 123 } 124 125 ; ********* 2. load with scaled register to float. ********* 126 define float @fct5(i8* nocapture %sp0, i64 %offset) { 127 ; CHECK-LABEL: fct5: 128 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 129 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 130 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 131 entry: 132 %addr = getelementptr i8, i8* %sp0, i64 %offset 133 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 134 %val = uitofp i8 %pix_sp0.0.copyload to float 135 %vmull.i = fmul float %val, %val 136 ret float %vmull.i 137 } 138 139 define float @fct6(i16* nocapture %sp0, i64 %offset) { 140 ; CHECK-LABEL: fct6: 141 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 142 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 143 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 144 entry: 145 %addr = getelementptr i16, i16* %sp0, i64 %offset 146 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 147 %val = uitofp i16 %pix_sp0.0.copyload to float 148 %vmull.i = fmul float %val, %val 149 ret float %vmull.i 150 } 151 152 define float @fct7(i32* nocapture %sp0, i64 %offset) { 153 ; CHECK-LABEL: fct7: 154 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 155 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 156 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 157 entry: 158 %addr = getelementptr i32, i32* %sp0, i64 %offset 159 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 160 %val = uitofp i32 %pix_sp0.0.copyload to float 161 %vmull.i = fmul float %val, %val 162 ret float %vmull.i 163 } 164 165 ; i64 -> f32 is not supported on floating point unit. 166 define float @fct8(i64* nocapture %sp0, i64 %offset) { 167 ; CHECK-LABEL: fct8: 168 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 169 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 170 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 171 entry: 172 %addr = getelementptr i64, i64* %sp0, i64 %offset 173 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 174 %val = uitofp i64 %pix_sp0.0.copyload to float 175 %vmull.i = fmul float %val, %val 176 ret float %vmull.i 177 } 178 179 180 ; ********* 3. load with scaled imm to double. ********* 181 define double @fct9(i8* nocapture %sp0) { 182 ; CHECK-LABEL: fct9: 183 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 184 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 185 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 186 entry: 187 %addr = getelementptr i8, i8* %sp0, i64 1 188 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 189 %val = uitofp i8 %pix_sp0.0.copyload to double 190 %vmull.i = fmul double %val, %val 191 ret double %vmull.i 192 } 193 194 define double @fct10(i16* nocapture %sp0) { 195 ; CHECK-LABEL: fct10: 196 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 197 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 198 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 199 entry: 200 %addr = getelementptr i16, i16* %sp0, i64 1 201 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 202 %val = uitofp i16 %pix_sp0.0.copyload to double 203 %vmull.i = fmul double %val, %val 204 ret double %vmull.i 205 } 206 207 define double @fct11(i32* nocapture %sp0) { 208 ; CHECK-LABEL: fct11: 209 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 210 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 211 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 212 entry: 213 %addr = getelementptr i32, i32* %sp0, i64 1 214 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 215 %val = uitofp i32 %pix_sp0.0.copyload to double 216 %vmull.i = fmul double %val, %val 217 ret double %vmull.i 218 } 219 220 define double @fct12(i64* nocapture %sp0) { 221 ; CHECK-LABEL: fct12: 222 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 223 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 224 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 225 entry: 226 %addr = getelementptr i64, i64* %sp0, i64 1 227 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 228 %val = uitofp i64 %pix_sp0.0.copyload to double 229 %vmull.i = fmul double %val, %val 230 ret double %vmull.i 231 } 232 233 ; ********* 4. load with scaled register to double. ********* 234 define double @fct13(i8* nocapture %sp0, i64 %offset) { 235 ; CHECK-LABEL: fct13: 236 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 237 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 238 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 239 entry: 240 %addr = getelementptr i8, i8* %sp0, i64 %offset 241 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 242 %val = uitofp i8 %pix_sp0.0.copyload to double 243 %vmull.i = fmul double %val, %val 244 ret double %vmull.i 245 } 246 247 define double @fct14(i16* nocapture %sp0, i64 %offset) { 248 ; CHECK-LABEL: fct14: 249 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 250 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 251 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 252 entry: 253 %addr = getelementptr i16, i16* %sp0, i64 %offset 254 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 255 %val = uitofp i16 %pix_sp0.0.copyload to double 256 %vmull.i = fmul double %val, %val 257 ret double %vmull.i 258 } 259 260 define double @fct15(i32* nocapture %sp0, i64 %offset) { 261 ; CHECK-LABEL: fct15: 262 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 263 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 264 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 265 entry: 266 %addr = getelementptr i32, i32* %sp0, i64 %offset 267 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 268 %val = uitofp i32 %pix_sp0.0.copyload to double 269 %vmull.i = fmul double %val, %val 270 ret double %vmull.i 271 } 272 273 define double @fct16(i64* nocapture %sp0, i64 %offset) { 274 ; CHECK-LABEL: fct16: 275 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 276 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 277 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 278 entry: 279 %addr = getelementptr i64, i64* %sp0, i64 %offset 280 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 281 %val = uitofp i64 %pix_sp0.0.copyload to double 282 %vmull.i = fmul double %val, %val 283 ret double %vmull.i 284 } 285 286 ; ********* 5. load with unscaled imm to float. ********* 287 define float @fct17(i8* nocapture %sp0) { 288 entry: 289 ; CHECK-LABEL: fct17: 290 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 291 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 292 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 293 %bitcast = ptrtoint i8* %sp0 to i64 294 %add = add i64 %bitcast, -1 295 %addr = inttoptr i64 %add to i8* 296 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 297 %val = uitofp i8 %pix_sp0.0.copyload to float 298 %vmull.i = fmul float %val, %val 299 ret float %vmull.i 300 } 301 302 define float @fct18(i16* nocapture %sp0) { 303 ; CHECK-LABEL: fct18: 304 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 305 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 306 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 307 %bitcast = ptrtoint i16* %sp0 to i64 308 %add = add i64 %bitcast, 1 309 %addr = inttoptr i64 %add to i16* 310 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 311 %val = uitofp i16 %pix_sp0.0.copyload to float 312 %vmull.i = fmul float %val, %val 313 ret float %vmull.i 314 } 315 316 define float @fct19(i32* nocapture %sp0) { 317 ; CHECK-LABEL: fct19: 318 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 319 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]] 320 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 321 %bitcast = ptrtoint i32* %sp0 to i64 322 %add = add i64 %bitcast, 1 323 %addr = inttoptr i64 %add to i32* 324 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 325 %val = uitofp i32 %pix_sp0.0.copyload to float 326 %vmull.i = fmul float %val, %val 327 ret float %vmull.i 328 } 329 330 ; i64 -> f32 is not supported on floating point unit. 331 define float @fct20(i64* nocapture %sp0) { 332 ; CHECK-LABEL: fct20: 333 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 334 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]] 335 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 336 %bitcast = ptrtoint i64* %sp0 to i64 337 %add = add i64 %bitcast, 1 338 %addr = inttoptr i64 %add to i64* 339 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 340 %val = uitofp i64 %pix_sp0.0.copyload to float 341 %vmull.i = fmul float %val, %val 342 ret float %vmull.i 343 344 } 345 346 ; ********* 6. load with unscaled imm to double. ********* 347 define double @fct21(i8* nocapture %sp0) { 348 entry: 349 ; CHECK-LABEL: fct21: 350 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 351 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 352 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 353 %bitcast = ptrtoint i8* %sp0 to i64 354 %add = add i64 %bitcast, -1 355 %addr = inttoptr i64 %add to i8* 356 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 357 %val = uitofp i8 %pix_sp0.0.copyload to double 358 %vmull.i = fmul double %val, %val 359 ret double %vmull.i 360 } 361 362 define double @fct22(i16* nocapture %sp0) { 363 ; CHECK-LABEL: fct22: 364 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 365 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 366 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 367 %bitcast = ptrtoint i16* %sp0 to i64 368 %add = add i64 %bitcast, 1 369 %addr = inttoptr i64 %add to i16* 370 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 371 %val = uitofp i16 %pix_sp0.0.copyload to double 372 %vmull.i = fmul double %val, %val 373 ret double %vmull.i 374 } 375 376 define double @fct23(i32* nocapture %sp0) { 377 ; CHECK-LABEL: fct23: 378 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 379 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 380 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 381 %bitcast = ptrtoint i32* %sp0 to i64 382 %add = add i64 %bitcast, 1 383 %addr = inttoptr i64 %add to i32* 384 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 385 %val = uitofp i32 %pix_sp0.0.copyload to double 386 %vmull.i = fmul double %val, %val 387 ret double %vmull.i 388 } 389 390 define double @fct24(i64* nocapture %sp0) { 391 ; CHECK-LABEL: fct24: 392 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 393 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]] 394 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 395 %bitcast = ptrtoint i64* %sp0 to i64 396 %add = add i64 %bitcast, 1 397 %addr = inttoptr i64 %add to i64* 398 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 399 %val = uitofp i64 %pix_sp0.0.copyload to double 400 %vmull.i = fmul double %val, %val 401 ret double %vmull.i 402 403 } 404 405 ; ********* 1s. load with scaled imm to float. ********* 406 define float @sfct1(i8* nocapture %sp0) { 407 ; CHECK-LABEL: sfct1: 408 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 409 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 410 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 411 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 412 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 413 ; CHECK-A57-LABEL: sfct1: 414 ; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1] 415 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 416 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]] 417 entry: 418 %addr = getelementptr i8, i8* %sp0, i64 1 419 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 420 %val = sitofp i8 %pix_sp0.0.copyload to float 421 %vmull.i = fmul float %val, %val 422 ret float %vmull.i 423 } 424 425 define float @sfct2(i16* nocapture %sp0) { 426 ; CHECK-LABEL: sfct2: 427 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 428 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 429 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 430 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 431 entry: 432 %addr = getelementptr i16, i16* %sp0, i64 1 433 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 434 %val = sitofp i16 %pix_sp0.0.copyload to float 435 %vmull.i = fmul float %val, %val 436 ret float %vmull.i 437 } 438 439 define float @sfct3(i32* nocapture %sp0) { 440 ; CHECK-LABEL: sfct3: 441 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 442 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 443 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 444 entry: 445 %addr = getelementptr i32, i32* %sp0, i64 1 446 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 447 %val = sitofp i32 %pix_sp0.0.copyload to float 448 %vmull.i = fmul float %val, %val 449 ret float %vmull.i 450 } 451 452 ; i64 -> f32 is not supported on floating point unit. 453 define float @sfct4(i64* nocapture %sp0) { 454 ; CHECK-LABEL: sfct4: 455 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8] 456 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 457 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 458 entry: 459 %addr = getelementptr i64, i64* %sp0, i64 1 460 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 461 %val = sitofp i64 %pix_sp0.0.copyload to float 462 %vmull.i = fmul float %val, %val 463 ret float %vmull.i 464 } 465 466 ; ********* 2s. load with scaled register to float. ********* 467 define float @sfct5(i8* nocapture %sp0, i64 %offset) { 468 ; CHECK-LABEL: sfct5: 469 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 470 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 471 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 472 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 473 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 474 ; CHECK-A57-LABEL: sfct5: 475 ; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1] 476 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 477 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]] 478 entry: 479 %addr = getelementptr i8, i8* %sp0, i64 %offset 480 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 481 %val = sitofp i8 %pix_sp0.0.copyload to float 482 %vmull.i = fmul float %val, %val 483 ret float %vmull.i 484 } 485 486 define float @sfct6(i16* nocapture %sp0, i64 %offset) { 487 ; CHECK-LABEL: sfct6: 488 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 489 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 490 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 491 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 492 entry: 493 %addr = getelementptr i16, i16* %sp0, i64 %offset 494 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 495 %val = sitofp i16 %pix_sp0.0.copyload to float 496 %vmull.i = fmul float %val, %val 497 ret float %vmull.i 498 } 499 500 define float @sfct7(i32* nocapture %sp0, i64 %offset) { 501 ; CHECK-LABEL: sfct7: 502 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 503 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 504 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 505 entry: 506 %addr = getelementptr i32, i32* %sp0, i64 %offset 507 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 508 %val = sitofp i32 %pix_sp0.0.copyload to float 509 %vmull.i = fmul float %val, %val 510 ret float %vmull.i 511 } 512 513 ; i64 -> f32 is not supported on floating point unit. 514 define float @sfct8(i64* nocapture %sp0, i64 %offset) { 515 ; CHECK-LABEL: sfct8: 516 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 517 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 518 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 519 entry: 520 %addr = getelementptr i64, i64* %sp0, i64 %offset 521 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 522 %val = sitofp i64 %pix_sp0.0.copyload to float 523 %vmull.i = fmul float %val, %val 524 ret float %vmull.i 525 } 526 527 ; ********* 3s. load with scaled imm to double. ********* 528 define double @sfct9(i8* nocapture %sp0) { 529 ; CHECK-LABEL: sfct9: 530 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1] 531 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 532 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 533 entry: 534 %addr = getelementptr i8, i8* %sp0, i64 1 535 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 536 %val = sitofp i8 %pix_sp0.0.copyload to double 537 %vmull.i = fmul double %val, %val 538 ret double %vmull.i 539 } 540 541 define double @sfct10(i16* nocapture %sp0) { 542 ; CHECK-LABEL: sfct10: 543 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 544 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 545 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 546 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 547 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 548 ; CHECK-A57-LABEL: sfct10: 549 ; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2] 550 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 551 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]] 552 entry: 553 %addr = getelementptr i16, i16* %sp0, i64 1 554 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 555 %val = sitofp i16 %pix_sp0.0.copyload to double 556 %vmull.i = fmul double %val, %val 557 ret double %vmull.i 558 } 559 560 define double @sfct11(i32* nocapture %sp0) { 561 ; CHECK-LABEL: sfct11: 562 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 563 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 564 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 565 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 566 entry: 567 %addr = getelementptr i32, i32* %sp0, i64 1 568 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 569 %val = sitofp i32 %pix_sp0.0.copyload to double 570 %vmull.i = fmul double %val, %val 571 ret double %vmull.i 572 } 573 574 define double @sfct12(i64* nocapture %sp0) { 575 ; CHECK-LABEL: sfct12: 576 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 577 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 578 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 579 entry: 580 %addr = getelementptr i64, i64* %sp0, i64 1 581 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 582 %val = sitofp i64 %pix_sp0.0.copyload to double 583 %vmull.i = fmul double %val, %val 584 ret double %vmull.i 585 } 586 587 ; ********* 4s. load with scaled register to double. ********* 588 define double @sfct13(i8* nocapture %sp0, i64 %offset) { 589 ; CHECK-LABEL: sfct13: 590 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1] 591 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 592 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 593 entry: 594 %addr = getelementptr i8, i8* %sp0, i64 %offset 595 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 596 %val = sitofp i8 %pix_sp0.0.copyload to double 597 %vmull.i = fmul double %val, %val 598 ret double %vmull.i 599 } 600 601 define double @sfct14(i16* nocapture %sp0, i64 %offset) { 602 ; CHECK-LABEL: sfct14: 603 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 604 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 605 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 606 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 607 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 608 ; CHECK-A57-LABEL: sfct14: 609 ; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 610 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 611 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]] 612 entry: 613 %addr = getelementptr i16, i16* %sp0, i64 %offset 614 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 615 %val = sitofp i16 %pix_sp0.0.copyload to double 616 %vmull.i = fmul double %val, %val 617 ret double %vmull.i 618 } 619 620 define double @sfct15(i32* nocapture %sp0, i64 %offset) { 621 ; CHECK-LABEL: sfct15: 622 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 623 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 624 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 625 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 626 entry: 627 %addr = getelementptr i32, i32* %sp0, i64 %offset 628 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 629 %val = sitofp i32 %pix_sp0.0.copyload to double 630 %vmull.i = fmul double %val, %val 631 ret double %vmull.i 632 } 633 634 define double @sfct16(i64* nocapture %sp0, i64 %offset) { 635 ; CHECK-LABEL: sfct16: 636 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3] 637 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 638 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 639 entry: 640 %addr = getelementptr i64, i64* %sp0, i64 %offset 641 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 642 %val = sitofp i64 %pix_sp0.0.copyload to double 643 %vmull.i = fmul double %val, %val 644 ret double %vmull.i 645 } 646 647 ; ********* 5s. load with unscaled imm to float. ********* 648 define float @sfct17(i8* nocapture %sp0) { 649 entry: 650 ; CHECK-LABEL: sfct17: 651 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1] 652 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 653 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 654 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 655 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 656 ; CHECK-A57-LABEL: sfct17: 657 ; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 658 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 659 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]] 660 %bitcast = ptrtoint i8* %sp0 to i64 661 %add = add i64 %bitcast, -1 662 %addr = inttoptr i64 %add to i8* 663 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 664 %val = sitofp i8 %pix_sp0.0.copyload to float 665 %vmull.i = fmul float %val, %val 666 ret float %vmull.i 667 } 668 669 define float @sfct18(i16* nocapture %sp0) { 670 ; CHECK-LABEL: sfct18: 671 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 672 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 673 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 674 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 675 %bitcast = ptrtoint i16* %sp0 to i64 676 %add = add i64 %bitcast, 1 677 %addr = inttoptr i64 %add to i16* 678 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 679 %val = sitofp i16 %pix_sp0.0.copyload to float 680 %vmull.i = fmul float %val, %val 681 ret float %vmull.i 682 } 683 684 define float @sfct19(i32* nocapture %sp0) { 685 ; CHECK-LABEL: sfct19: 686 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 687 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]] 688 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 689 %bitcast = ptrtoint i32* %sp0 to i64 690 %add = add i64 %bitcast, 1 691 %addr = inttoptr i64 %add to i32* 692 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 693 %val = sitofp i32 %pix_sp0.0.copyload to float 694 %vmull.i = fmul float %val, %val 695 ret float %vmull.i 696 } 697 698 ; i64 -> f32 is not supported on floating point unit. 699 define float @sfct20(i64* nocapture %sp0) { 700 ; CHECK-LABEL: sfct20: 701 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1] 702 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]] 703 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 704 %bitcast = ptrtoint i64* %sp0 to i64 705 %add = add i64 %bitcast, 1 706 %addr = inttoptr i64 %add to i64* 707 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 708 %val = sitofp i64 %pix_sp0.0.copyload to float 709 %vmull.i = fmul float %val, %val 710 ret float %vmull.i 711 712 } 713 714 ; ********* 6s. load with unscaled imm to double. ********* 715 define double @sfct21(i8* nocapture %sp0) { 716 entry: 717 ; CHECK-LABEL: sfct21: 718 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 719 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 720 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 721 %bitcast = ptrtoint i8* %sp0 to i64 722 %add = add i64 %bitcast, -1 723 %addr = inttoptr i64 %add to i8* 724 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 725 %val = sitofp i8 %pix_sp0.0.copyload to double 726 %vmull.i = fmul double %val, %val 727 ret double %vmull.i 728 } 729 730 define double @sfct22(i16* nocapture %sp0) { 731 ; CHECK-LABEL: sfct22: 732 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1] 733 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0 734 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0 735 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 736 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 737 ; CHECK-A57-LABEL: sfct22: 738 ; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1] 739 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 740 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]] 741 %bitcast = ptrtoint i16* %sp0 to i64 742 %add = add i64 %bitcast, 1 743 %addr = inttoptr i64 %add to i16* 744 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 745 %val = sitofp i16 %pix_sp0.0.copyload to double 746 %vmull.i = fmul double %val, %val 747 ret double %vmull.i 748 } 749 750 define double @sfct23(i32* nocapture %sp0) { 751 ; CHECK-LABEL: sfct23: 752 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1] 753 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0 754 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 755 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 756 %bitcast = ptrtoint i32* %sp0 to i64 757 %add = add i64 %bitcast, 1 758 %addr = inttoptr i64 %add to i32* 759 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 760 %val = sitofp i32 %pix_sp0.0.copyload to double 761 %vmull.i = fmul double %val, %val 762 ret double %vmull.i 763 } 764 765 define double @sfct24(i64* nocapture %sp0) { 766 ; CHECK-LABEL: sfct24: 767 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1] 768 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]] 769 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 770 %bitcast = ptrtoint i64* %sp0 to i64 771 %add = add i64 %bitcast, 1 772 %addr = inttoptr i64 %add to i64* 773 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 774 %val = sitofp i64 %pix_sp0.0.copyload to double 775 %vmull.i = fmul double %val, %val 776 ret double %vmull.i 777 778 } 779 780 ; Check that we do not use SSHLL code sequence when code size is a concern. 781 define float @codesize_sfct17(i8* nocapture %sp0) optsize { 782 entry: 783 ; CHECK-LABEL: codesize_sfct17: 784 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1] 785 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]] 786 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]] 787 %bitcast = ptrtoint i8* %sp0 to i64 788 %add = add i64 %bitcast, -1 789 %addr = inttoptr i64 %add to i8* 790 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 791 %val = sitofp i8 %pix_sp0.0.copyload to float 792 %vmull.i = fmul float %val, %val 793 ret float %vmull.i 794 } 795 796 define double @codesize_sfct11(i32* nocapture %sp0) minsize { 797 ; CHECK-LABEL: sfct11: 798 ; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4] 799 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]] 800 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]] 801 entry: 802 %addr = getelementptr i32, i32* %sp0, i64 1 803 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 804 %val = sitofp i32 %pix_sp0.0.copyload to double 805 %vmull.i = fmul double %val, %val 806 ret double %vmull.i 807 } 808 809 ; Adding fp128 custom lowering makes these a little fragile since we have to 810 ; return the correct mix of Legal/Expand from the custom method. 811 ; 812 ; rdar://problem/14991489 813 814 define float @float_from_i128(i128 %in) { 815 ; CHECK-LABEL: float_from_i128: 816 ; CHECK: bl {{_?__floatuntisf}} 817 %conv = uitofp i128 %in to float 818 ret float %conv 819 } 820 821 define double @double_from_i128(i128 %in) { 822 ; CHECK-LABEL: double_from_i128: 823 ; CHECK: bl {{_?__floattidf}} 824 %conv = sitofp i128 %in to double 825 ret double %conv 826 } 827 828 define fp128 @fp128_from_i128(i128 %in) { 829 ; CHECK-LABEL: fp128_from_i128: 830 ; CHECK: bl {{_?__floatuntitf}} 831 %conv = uitofp i128 %in to fp128 832 ret fp128 %conv 833 } 834 835 define i128 @i128_from_float(float %in) { 836 ; CHECK-LABEL: i128_from_float 837 ; CHECK: bl {{_?__fixsfti}} 838 %conv = fptosi float %in to i128 839 ret i128 %conv 840 } 841 842 define i128 @i128_from_double(double %in) { 843 ; CHECK-LABEL: i128_from_double 844 ; CHECK: bl {{_?__fixunsdfti}} 845 %conv = fptoui double %in to i128 846 ret i128 %conv 847 } 848 849 define i128 @i128_from_fp128(fp128 %in) { 850 ; CHECK-LABEL: i128_from_fp128 851 ; CHECK: bl {{_?__fixtfti}} 852 %conv = fptosi fp128 %in to i128 853 ret i128 %conv 854 } 855 856