Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck -enable-var-scope %s
      2 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cortex-a57 | FileCheck -enable-var-scope --check-prefix=CHECK-A57 %s
      3 ; rdar://13082402
      4 
      5 define float @t1(i32* nocapture %src) nounwind ssp {
      6 entry:
      7 ; CHECK-LABEL: t1:
      8 ; CHECK: ldr s0, [x0]
      9 ; CHECK: scvtf s0, s0
     10   %tmp1 = load i32, i32* %src, align 4
     11   %tmp2 = sitofp i32 %tmp1 to float
     12   ret float %tmp2
     13 }
     14 
     15 define float @t2(i32* nocapture %src) nounwind ssp {
     16 entry:
     17 ; CHECK-LABEL: t2:
     18 ; CHECK: ldr s0, [x0]
     19 ; CHECK: ucvtf s0, s0
     20   %tmp1 = load i32, i32* %src, align 4
     21   %tmp2 = uitofp i32 %tmp1 to float
     22   ret float %tmp2
     23 }
     24 
     25 define double @t3(i64* nocapture %src) nounwind ssp {
     26 entry:
     27 ; CHECK-LABEL: t3:
     28 ; CHECK: ldr d0, [x0]
     29 ; CHECK: scvtf d0, d0
     30   %tmp1 = load i64, i64* %src, align 4
     31   %tmp2 = sitofp i64 %tmp1 to double
     32   ret double %tmp2
     33 }
     34 
     35 define double @t4(i64* nocapture %src) nounwind ssp {
     36 entry:
     37 ; CHECK-LABEL: t4:
     38 ; CHECK: ldr d0, [x0]
     39 ; CHECK: ucvtf d0, d0
     40   %tmp1 = load i64, i64* %src, align 4
     41   %tmp2 = uitofp i64 %tmp1 to double
     42   ret double %tmp2
     43 }
     44 
     45 ; rdar://13136456
     46 define double @t5(i32* nocapture %src) nounwind ssp optsize {
     47 entry:
     48 ; CHECK-LABEL: t5:
     49 ; CHECK: ldr [[REG:w[0-9]+]], [x0]
     50 ; CHECK: scvtf d0, [[REG]]
     51   %tmp1 = load i32, i32* %src, align 4
     52   %tmp2 = sitofp i32 %tmp1 to double
     53   ret double %tmp2
     54 }
     55 
     56 ; Check that we load in FP register when we want to convert into
     57 ; floating point value.
     58 ; This is much faster than loading on GPR and making the conversion
     59 ; GPR -> FPR.
     60 ; <rdar://problem/14599607>
     61 ;
     62 ; Check the flollowing patterns for signed/unsigned:
     63 ; 1. load with scaled imm to float.
     64 ; 2. load with scaled register to float.
     65 ; 3. load with scaled imm to double.
     66 ; 4. load with scaled register to double.
     67 ; 5. load with unscaled imm to float.
     68 ; 6. load with unscaled imm to double.
     69 ; With loading size: 8, 16, 32, and 64-bits.
     70 
     71 ; ********* 1. load with scaled imm to float. *********
     72 define float @fct1(i8* nocapture %sp0) {
     73 ; CHECK-LABEL: fct1:
     74 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
     75 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
     76 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
     77 entry:
     78   %addr = getelementptr i8, i8* %sp0, i64 1
     79   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
     80   %val = uitofp i8 %pix_sp0.0.copyload to float
     81   %vmull.i = fmul float %val, %val
     82   ret float %vmull.i
     83 }
     84 
     85 define float @fct2(i16* nocapture %sp0) {
     86 ; CHECK-LABEL: fct2:
     87 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
     88 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
     89 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
     90 entry:
     91   %addr = getelementptr i16, i16* %sp0, i64 1
     92   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
     93   %val = uitofp i16 %pix_sp0.0.copyload to float
     94   %vmull.i = fmul float %val, %val
     95   ret float %vmull.i
     96 }
     97 
     98 define float @fct3(i32* nocapture %sp0) {
     99 ; CHECK-LABEL: fct3:
    100 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    101 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    102 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    103 entry:
    104   %addr = getelementptr i32, i32* %sp0, i64 1
    105   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    106   %val = uitofp i32 %pix_sp0.0.copyload to float
    107   %vmull.i = fmul float %val, %val
    108   ret float %vmull.i
    109 }
    110 
    111 ; i64 -> f32 is not supported on floating point unit.
    112 define float @fct4(i64* nocapture %sp0) {
    113 ; CHECK-LABEL: fct4:
    114 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
    115 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
    116 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    117 entry:
    118   %addr = getelementptr i64, i64* %sp0, i64 1
    119   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    120   %val = uitofp i64 %pix_sp0.0.copyload to float
    121   %vmull.i = fmul float %val, %val
    122   ret float %vmull.i
    123 }
    124 
    125 ; ********* 2. load with scaled register to float. *********
    126 define float @fct5(i8* nocapture %sp0, i64 %offset) {
    127 ; CHECK-LABEL: fct5:
    128 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    129 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    130 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    131 entry:
    132   %addr = getelementptr i8, i8* %sp0, i64 %offset
    133   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    134   %val = uitofp i8 %pix_sp0.0.copyload to float
    135   %vmull.i = fmul float %val, %val
    136   ret float %vmull.i
    137 }
    138 
    139 define float @fct6(i16* nocapture %sp0, i64 %offset) {
    140 ; CHECK-LABEL: fct6:
    141 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    142 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    143 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    144 entry:
    145   %addr = getelementptr i16, i16* %sp0, i64 %offset
    146   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    147   %val = uitofp i16 %pix_sp0.0.copyload to float
    148   %vmull.i = fmul float %val, %val
    149   ret float %vmull.i
    150 }
    151 
    152 define float @fct7(i32* nocapture %sp0, i64 %offset) {
    153 ; CHECK-LABEL: fct7:
    154 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    155 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    156 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    157 entry:
    158   %addr = getelementptr i32, i32* %sp0, i64 %offset
    159   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    160   %val = uitofp i32 %pix_sp0.0.copyload to float
    161   %vmull.i = fmul float %val, %val
    162   ret float %vmull.i
    163 }
    164 
    165 ; i64 -> f32 is not supported on floating point unit.
    166 define float @fct8(i64* nocapture %sp0, i64 %offset) {
    167 ; CHECK-LABEL: fct8:
    168 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    169 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
    170 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    171 entry:
    172   %addr = getelementptr i64, i64* %sp0, i64 %offset
    173   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    174   %val = uitofp i64 %pix_sp0.0.copyload to float
    175   %vmull.i = fmul float %val, %val
    176   ret float %vmull.i
    177 }
    178 
    179 
    180 ; ********* 3. load with scaled imm to double. *********
    181 define double @fct9(i8* nocapture %sp0) {
    182 ; CHECK-LABEL: fct9:
    183 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    184 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    185 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    186 entry:
    187   %addr = getelementptr i8, i8* %sp0, i64 1
    188   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    189   %val = uitofp i8 %pix_sp0.0.copyload to double
    190   %vmull.i = fmul double %val, %val
    191   ret double %vmull.i
    192 }
    193 
    194 define double @fct10(i16* nocapture %sp0) {
    195 ; CHECK-LABEL: fct10:
    196 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    197 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    198 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    199 entry:
    200   %addr = getelementptr i16, i16* %sp0, i64 1
    201   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    202   %val = uitofp i16 %pix_sp0.0.copyload to double
    203   %vmull.i = fmul double %val, %val
    204   ret double %vmull.i
    205 }
    206 
    207 define double @fct11(i32* nocapture %sp0) {
    208 ; CHECK-LABEL: fct11:
    209 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    210 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    211 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    212 entry:
    213   %addr = getelementptr i32, i32* %sp0, i64 1
    214   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    215   %val = uitofp i32 %pix_sp0.0.copyload to double
    216   %vmull.i = fmul double %val, %val
    217   ret double %vmull.i
    218 }
    219 
    220 define double @fct12(i64* nocapture %sp0) {
    221 ; CHECK-LABEL: fct12:
    222 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
    223 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    224 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    225 entry:
    226   %addr = getelementptr i64, i64* %sp0, i64 1
    227   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    228   %val = uitofp i64 %pix_sp0.0.copyload to double
    229   %vmull.i = fmul double %val, %val
    230   ret double %vmull.i
    231 }
    232 
    233 ; ********* 4. load with scaled register to double. *********
    234 define double @fct13(i8* nocapture %sp0, i64 %offset) {
    235 ; CHECK-LABEL: fct13:
    236 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    237 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    238 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    239 entry:
    240   %addr = getelementptr i8, i8* %sp0, i64 %offset
    241   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    242   %val = uitofp i8 %pix_sp0.0.copyload to double
    243   %vmull.i = fmul double %val, %val
    244   ret double %vmull.i
    245 }
    246 
    247 define double @fct14(i16* nocapture %sp0, i64 %offset) {
    248 ; CHECK-LABEL: fct14:
    249 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    250 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    251 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    252 entry:
    253   %addr = getelementptr i16, i16* %sp0, i64 %offset
    254   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    255   %val = uitofp i16 %pix_sp0.0.copyload to double
    256   %vmull.i = fmul double %val, %val
    257   ret double %vmull.i
    258 }
    259 
    260 define double @fct15(i32* nocapture %sp0, i64 %offset) {
    261 ; CHECK-LABEL: fct15:
    262 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    263 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    264 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    265 entry:
    266   %addr = getelementptr i32, i32* %sp0, i64 %offset
    267   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    268   %val = uitofp i32 %pix_sp0.0.copyload to double
    269   %vmull.i = fmul double %val, %val
    270   ret double %vmull.i
    271 }
    272 
    273 define double @fct16(i64* nocapture %sp0, i64 %offset) {
    274 ; CHECK-LABEL: fct16:
    275 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    276 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    277 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    278 entry:
    279   %addr = getelementptr i64, i64* %sp0, i64 %offset
    280   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    281   %val = uitofp i64 %pix_sp0.0.copyload to double
    282   %vmull.i = fmul double %val, %val
    283   ret double %vmull.i
    284 }
    285 
    286 ; ********* 5. load with unscaled imm to float. *********
    287 define float @fct17(i8* nocapture %sp0) {
    288 entry:
    289 ; CHECK-LABEL: fct17:
    290 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
    291 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    292 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    293   %bitcast = ptrtoint i8* %sp0 to i64
    294   %add = add i64 %bitcast, -1
    295   %addr = inttoptr i64 %add to i8*
    296   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    297   %val = uitofp i8 %pix_sp0.0.copyload to float
    298   %vmull.i = fmul float %val, %val
    299   ret float %vmull.i
    300 }
    301 
    302 define float @fct18(i16* nocapture %sp0) {
    303 ; CHECK-LABEL: fct18:
    304 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    305 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    306 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    307   %bitcast = ptrtoint i16* %sp0 to i64
    308   %add = add i64 %bitcast, 1
    309   %addr = inttoptr i64 %add to i16*
    310   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    311   %val = uitofp i16 %pix_sp0.0.copyload to float
    312   %vmull.i = fmul float %val, %val
    313   ret float %vmull.i
    314 }
    315 
    316 define float @fct19(i32* nocapture %sp0) {
    317 ; CHECK-LABEL: fct19:
    318 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    319 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    320 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    321   %bitcast = ptrtoint i32* %sp0 to i64
    322   %add = add i64 %bitcast, 1
    323   %addr = inttoptr i64 %add to i32*
    324   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    325   %val = uitofp i32 %pix_sp0.0.copyload to float
    326   %vmull.i = fmul float %val, %val
    327   ret float %vmull.i
    328 }
    329 
    330 ; i64 -> f32 is not supported on floating point unit.
    331 define float @fct20(i64* nocapture %sp0) {
    332 ; CHECK-LABEL: fct20:
    333 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
    334 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
    335 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    336   %bitcast = ptrtoint i64* %sp0 to i64
    337   %add = add i64 %bitcast, 1
    338   %addr = inttoptr i64 %add to i64*
    339   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    340   %val = uitofp i64 %pix_sp0.0.copyload to float
    341   %vmull.i = fmul float %val, %val
    342   ret float %vmull.i
    343 
    344 }
    345 
    346 ; ********* 6. load with unscaled imm to double. *********
    347 define double @fct21(i8* nocapture %sp0) {
    348 entry:
    349 ; CHECK-LABEL: fct21:
    350 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
    351 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    352 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    353   %bitcast = ptrtoint i8* %sp0 to i64
    354   %add = add i64 %bitcast, -1
    355   %addr = inttoptr i64 %add to i8*
    356   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    357   %val = uitofp i8 %pix_sp0.0.copyload to double
    358   %vmull.i = fmul double %val, %val
    359   ret double %vmull.i
    360 }
    361 
    362 define double @fct22(i16* nocapture %sp0) {
    363 ; CHECK-LABEL: fct22:
    364 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    365 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    366 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    367   %bitcast = ptrtoint i16* %sp0 to i64
    368   %add = add i64 %bitcast, 1
    369   %addr = inttoptr i64 %add to i16*
    370   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    371   %val = uitofp i16 %pix_sp0.0.copyload to double
    372   %vmull.i = fmul double %val, %val
    373   ret double %vmull.i
    374 }
    375 
    376 define double @fct23(i32* nocapture %sp0) {
    377 ; CHECK-LABEL: fct23:
    378 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    379 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    380 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    381   %bitcast = ptrtoint i32* %sp0 to i64
    382   %add = add i64 %bitcast, 1
    383   %addr = inttoptr i64 %add to i32*
    384   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    385   %val = uitofp i32 %pix_sp0.0.copyload to double
    386   %vmull.i = fmul double %val, %val
    387   ret double %vmull.i
    388 }
    389 
    390 define double @fct24(i64* nocapture %sp0) {
    391 ; CHECK-LABEL: fct24:
    392 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
    393 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    394 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    395   %bitcast = ptrtoint i64* %sp0 to i64
    396   %add = add i64 %bitcast, 1
    397   %addr = inttoptr i64 %add to i64*
    398   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    399   %val = uitofp i64 %pix_sp0.0.copyload to double
    400   %vmull.i = fmul double %val, %val
    401   ret double %vmull.i
    402 
    403 }
    404 
    405 ; ********* 1s. load with scaled imm to float. *********
    406 define float @sfct1(i8* nocapture %sp0) {
    407 ; CHECK-LABEL: sfct1:
    408 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    409 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    410 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    411 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    412 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    413 ; CHECK-A57-LABEL: sfct1:
    414 ; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
    415 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
    416 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
    417 entry:
    418   %addr = getelementptr i8, i8* %sp0, i64 1
    419   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    420   %val = sitofp i8 %pix_sp0.0.copyload to float
    421   %vmull.i = fmul float %val, %val
    422   ret float %vmull.i
    423 }
    424 
    425 define float @sfct2(i16* nocapture %sp0) {
    426 ; CHECK-LABEL: sfct2:
    427 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    428 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    429 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    430 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    431 entry:
    432   %addr = getelementptr i16, i16* %sp0, i64 1
    433   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    434   %val = sitofp i16 %pix_sp0.0.copyload to float
    435   %vmull.i = fmul float %val, %val
    436   ret float %vmull.i
    437 }
    438 
    439 define float @sfct3(i32* nocapture %sp0) {
    440 ; CHECK-LABEL: sfct3:
    441 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    442 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[REGNUM]]
    443 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    444 entry:
    445   %addr = getelementptr i32, i32* %sp0, i64 1
    446   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    447   %val = sitofp i32 %pix_sp0.0.copyload to float
    448   %vmull.i = fmul float %val, %val
    449   ret float %vmull.i
    450 }
    451 
    452 ; i64 -> f32 is not supported on floating point unit.
    453 define float @sfct4(i64* nocapture %sp0) {
    454 ; CHECK-LABEL: sfct4:
    455 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
    456 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
    457 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    458 entry:
    459   %addr = getelementptr i64, i64* %sp0, i64 1
    460   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    461   %val = sitofp i64 %pix_sp0.0.copyload to float
    462   %vmull.i = fmul float %val, %val
    463   ret float %vmull.i
    464 }
    465 
    466 ; ********* 2s. load with scaled register to float. *********
    467 define float @sfct5(i8* nocapture %sp0, i64 %offset) {
    468 ; CHECK-LABEL: sfct5:
    469 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    470 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    471 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    472 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    473 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    474 ; CHECK-A57-LABEL: sfct5:
    475 ; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
    476 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
    477 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
    478 entry:
    479   %addr = getelementptr i8, i8* %sp0, i64 %offset
    480   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    481   %val = sitofp i8 %pix_sp0.0.copyload to float
    482   %vmull.i = fmul float %val, %val
    483   ret float %vmull.i
    484 }
    485 
    486 define float @sfct6(i16* nocapture %sp0, i64 %offset) {
    487 ; CHECK-LABEL: sfct6:
    488 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    489 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    490 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    491 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    492 entry:
    493   %addr = getelementptr i16, i16* %sp0, i64 %offset
    494   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    495   %val = sitofp i16 %pix_sp0.0.copyload to float
    496   %vmull.i = fmul float %val, %val
    497   ret float %vmull.i
    498 }
    499 
    500 define float @sfct7(i32* nocapture %sp0, i64 %offset) {
    501 ; CHECK-LABEL: sfct7:
    502 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    503 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[REGNUM]]
    504 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    505 entry:
    506   %addr = getelementptr i32, i32* %sp0, i64 %offset
    507   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    508   %val = sitofp i32 %pix_sp0.0.copyload to float
    509   %vmull.i = fmul float %val, %val
    510   ret float %vmull.i
    511 }
    512 
    513 ; i64 -> f32 is not supported on floating point unit.
    514 define float @sfct8(i64* nocapture %sp0, i64 %offset) {
    515 ; CHECK-LABEL: sfct8:
    516 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    517 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
    518 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    519 entry:
    520   %addr = getelementptr i64, i64* %sp0, i64 %offset
    521   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    522   %val = sitofp i64 %pix_sp0.0.copyload to float
    523   %vmull.i = fmul float %val, %val
    524   ret float %vmull.i
    525 }
    526 
    527 ; ********* 3s. load with scaled imm to double. *********
    528 define double @sfct9(i8* nocapture %sp0) {
    529 ; CHECK-LABEL: sfct9:
    530 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
    531 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    532 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    533 entry:
    534   %addr = getelementptr i8, i8* %sp0, i64 1
    535   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    536   %val = sitofp i8 %pix_sp0.0.copyload to double
    537   %vmull.i = fmul double %val, %val
    538   ret double %vmull.i
    539 }
    540 
    541 define double @sfct10(i16* nocapture %sp0) {
    542 ; CHECK-LABEL: sfct10:
    543 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    544 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    545 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    546 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    547 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    548 ; CHECK-A57-LABEL: sfct10:
    549 ; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2]
    550 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    551 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
    552 entry:
    553   %addr = getelementptr i16, i16* %sp0, i64 1
    554   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    555   %val = sitofp i16 %pix_sp0.0.copyload to double
    556   %vmull.i = fmul double %val, %val
    557   ret double %vmull.i
    558 }
    559 
    560 define double @sfct11(i32* nocapture %sp0) {
    561 ; CHECK-LABEL: sfct11:
    562 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    563 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    564 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    565 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    566 entry:
    567   %addr = getelementptr i32, i32* %sp0, i64 1
    568   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    569   %val = sitofp i32 %pix_sp0.0.copyload to double
    570   %vmull.i = fmul double %val, %val
    571   ret double %vmull.i
    572 }
    573 
    574 define double @sfct12(i64* nocapture %sp0) {
    575 ; CHECK-LABEL: sfct12:
    576 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
    577 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[REGNUM]]
    578 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    579 entry:
    580   %addr = getelementptr i64, i64* %sp0, i64 1
    581   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    582   %val = sitofp i64 %pix_sp0.0.copyload to double
    583   %vmull.i = fmul double %val, %val
    584   ret double %vmull.i
    585 }
    586 
    587 ; ********* 4s. load with scaled register to double. *********
    588 define double @sfct13(i8* nocapture %sp0, i64 %offset) {
    589 ; CHECK-LABEL: sfct13:
    590 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
    591 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    592 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    593 entry:
    594   %addr = getelementptr i8, i8* %sp0, i64 %offset
    595   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    596   %val = sitofp i8 %pix_sp0.0.copyload to double
    597   %vmull.i = fmul double %val, %val
    598   ret double %vmull.i
    599 }
    600 
    601 define double @sfct14(i16* nocapture %sp0, i64 %offset) {
    602 ; CHECK-LABEL: sfct14:
    603 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    604 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    605 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    606 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    607 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    608 ; CHECK-A57-LABEL: sfct14:
    609 ; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    610 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    611 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
    612 entry:
    613   %addr = getelementptr i16, i16* %sp0, i64 %offset
    614   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    615   %val = sitofp i16 %pix_sp0.0.copyload to double
    616   %vmull.i = fmul double %val, %val
    617   ret double %vmull.i
    618 }
    619 
    620 define double @sfct15(i32* nocapture %sp0, i64 %offset) {
    621 ; CHECK-LABEL: sfct15:
    622 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    623 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    624 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    625 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    626 entry:
    627   %addr = getelementptr i32, i32* %sp0, i64 %offset
    628   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    629   %val = sitofp i32 %pix_sp0.0.copyload to double
    630   %vmull.i = fmul double %val, %val
    631   ret double %vmull.i
    632 }
    633 
    634 define double @sfct16(i64* nocapture %sp0, i64 %offset) {
    635 ; CHECK-LABEL: sfct16:
    636 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    637 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[REGNUM]]
    638 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    639 entry:
    640   %addr = getelementptr i64, i64* %sp0, i64 %offset
    641   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    642   %val = sitofp i64 %pix_sp0.0.copyload to double
    643   %vmull.i = fmul double %val, %val
    644   ret double %vmull.i
    645 }
    646 
    647 ; ********* 5s. load with unscaled imm to float. *********
    648 define float @sfct17(i8* nocapture %sp0) {
    649 entry:
    650 ; CHECK-LABEL: sfct17:
    651 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
    652 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    653 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    654 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    655 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    656 ; CHECK-A57-LABEL: sfct17:
    657 ; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
    658 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
    659 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
    660   %bitcast = ptrtoint i8* %sp0 to i64
    661   %add = add i64 %bitcast, -1
    662   %addr = inttoptr i64 %add to i8*
    663   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    664   %val = sitofp i8 %pix_sp0.0.copyload to float
    665   %vmull.i = fmul float %val, %val
    666   ret float %vmull.i
    667 }
    668 
    669 define float @sfct18(i16* nocapture %sp0) {
    670 ; CHECK-LABEL: sfct18:
    671 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    672 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    673 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    674 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    675   %bitcast = ptrtoint i16* %sp0 to i64
    676   %add = add i64 %bitcast, 1
    677   %addr = inttoptr i64 %add to i16*
    678   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    679   %val = sitofp i16 %pix_sp0.0.copyload to float
    680   %vmull.i = fmul float %val, %val
    681   ret float %vmull.i
    682 }
    683 
    684 define float @sfct19(i32* nocapture %sp0) {
    685 ; CHECK-LABEL: sfct19:
    686 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    687 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[REGNUM]]
    688 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    689   %bitcast = ptrtoint i32* %sp0 to i64
    690   %add = add i64 %bitcast, 1
    691   %addr = inttoptr i64 %add to i32*
    692   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    693   %val = sitofp i32 %pix_sp0.0.copyload to float
    694   %vmull.i = fmul float %val, %val
    695   ret float %vmull.i
    696 }
    697 
    698 ; i64 -> f32 is not supported on floating point unit.
    699 define float @sfct20(i64* nocapture %sp0) {
    700 ; CHECK-LABEL: sfct20:
    701 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
    702 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
    703 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    704   %bitcast = ptrtoint i64* %sp0 to i64
    705   %add = add i64 %bitcast, 1
    706   %addr = inttoptr i64 %add to i64*
    707   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    708   %val = sitofp i64 %pix_sp0.0.copyload to float
    709   %vmull.i = fmul float %val, %val
    710   ret float %vmull.i
    711 
    712 }
    713 
    714 ; ********* 6s. load with unscaled imm to double. *********
    715 define double @sfct21(i8* nocapture %sp0) {
    716 entry:
    717 ; CHECK-LABEL: sfct21:
    718 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
    719 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    720 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    721   %bitcast = ptrtoint i8* %sp0 to i64
    722   %add = add i64 %bitcast, -1
    723   %addr = inttoptr i64 %add to i8*
    724   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    725   %val = sitofp i8 %pix_sp0.0.copyload to double
    726   %vmull.i = fmul double %val, %val
    727   ret double %vmull.i
    728 }
    729 
    730 define double @sfct22(i16* nocapture %sp0) {
    731 ; CHECK-LABEL: sfct22:
    732 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    733 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    734 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    735 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    736 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    737 ; CHECK-A57-LABEL: sfct22:
    738 ; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1]
    739 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    740 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
    741   %bitcast = ptrtoint i16* %sp0 to i64
    742   %add = add i64 %bitcast, 1
    743   %addr = inttoptr i64 %add to i16*
    744   %pix_sp0.0.copyload = load i16, i16* %addr, align 1
    745   %val = sitofp i16 %pix_sp0.0.copyload to double
    746   %vmull.i = fmul double %val, %val
    747   ret double %vmull.i
    748 }
    749 
    750 define double @sfct23(i32* nocapture %sp0) {
    751 ; CHECK-LABEL: sfct23:
    752 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    753 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    754 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    755 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    756   %bitcast = ptrtoint i32* %sp0 to i64
    757   %add = add i64 %bitcast, 1
    758   %addr = inttoptr i64 %add to i32*
    759   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    760   %val = sitofp i32 %pix_sp0.0.copyload to double
    761   %vmull.i = fmul double %val, %val
    762   ret double %vmull.i
    763 }
    764 
    765 define double @sfct24(i64* nocapture %sp0) {
    766 ; CHECK-LABEL: sfct24:
    767 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
    768 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[REGNUM]]
    769 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    770   %bitcast = ptrtoint i64* %sp0 to i64
    771   %add = add i64 %bitcast, 1
    772   %addr = inttoptr i64 %add to i64*
    773   %pix_sp0.0.copyload = load i64, i64* %addr, align 1
    774   %val = sitofp i64 %pix_sp0.0.copyload to double
    775   %vmull.i = fmul double %val, %val
    776   ret double %vmull.i
    777 
    778 }
    779 
    780 ; Check that we do not use SSHLL code sequence when code size is a concern.
    781 define float @codesize_sfct17(i8* nocapture %sp0) optsize {
    782 entry:
    783 ; CHECK-LABEL: codesize_sfct17:
    784 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
    785 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
    786 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    787   %bitcast = ptrtoint i8* %sp0 to i64
    788   %add = add i64 %bitcast, -1
    789   %addr = inttoptr i64 %add to i8*
    790   %pix_sp0.0.copyload = load i8, i8* %addr, align 1
    791   %val = sitofp i8 %pix_sp0.0.copyload to float
    792   %vmull.i = fmul float %val, %val
    793   ret float %vmull.i
    794 }
    795 
    796 define double @codesize_sfct11(i32* nocapture %sp0) minsize {
    797 ; CHECK-LABEL: sfct11:
    798 ; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
    799 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    800 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    801 entry:
    802   %addr = getelementptr i32, i32* %sp0, i64 1
    803   %pix_sp0.0.copyload = load i32, i32* %addr, align 1
    804   %val = sitofp i32 %pix_sp0.0.copyload to double
    805   %vmull.i = fmul double %val, %val
    806   ret double %vmull.i
    807 }
    808 
    809 ; Adding fp128 custom lowering makes these a little fragile since we have to
    810 ; return the correct mix of Legal/Expand from the custom method.
    811 ;
    812 ; rdar://problem/14991489
    813 
    814 define float @float_from_i128(i128 %in) {
    815 ; CHECK-LABEL: float_from_i128:
    816 ; CHECK: bl {{_?__floatuntisf}}
    817   %conv = uitofp i128 %in to float
    818   ret float %conv
    819 }
    820 
    821 define double @double_from_i128(i128 %in) {
    822 ; CHECK-LABEL: double_from_i128:
    823 ; CHECK: bl {{_?__floattidf}}
    824   %conv = sitofp i128 %in to double
    825   ret double %conv
    826 }
    827 
    828 define fp128 @fp128_from_i128(i128 %in) {
    829 ; CHECK-LABEL: fp128_from_i128:
    830 ; CHECK: bl {{_?__floatuntitf}}
    831   %conv = uitofp i128 %in to fp128
    832   ret fp128 %conv
    833 }
    834 
    835 define i128 @i128_from_float(float %in) {
    836 ; CHECK-LABEL: i128_from_float
    837 ; CHECK: bl {{_?__fixsfti}}
    838   %conv = fptosi float %in to i128
    839   ret i128 %conv
    840 }
    841 
    842 define i128 @i128_from_double(double %in) {
    843 ; CHECK-LABEL: i128_from_double
    844 ; CHECK: bl {{_?__fixunsdfti}}
    845   %conv = fptoui double %in to i128
    846   ret i128 %conv
    847 }
    848 
    849 define i128 @i128_from_fp128(fp128 %in) {
    850 ; CHECK-LABEL: i128_from_fp128
    851 ; CHECK: bl {{_?__fixtfti}}
    852   %conv = fptosi fp128 %in to i128
    853   ret i128 %conv
    854 }
    855 
    856