Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
      2 ; rdar://13082402
      3 
      4 define float @t1(i32* nocapture %src) nounwind ssp {
      5 entry:
      6 ; CHECK-LABEL: t1:
      7 ; CHECK: ldr s0, [x0]
      8 ; CHECK: scvtf s0, s0
      9   %tmp1 = load i32* %src, align 4
     10   %tmp2 = sitofp i32 %tmp1 to float
     11   ret float %tmp2
     12 }
     13 
     14 define float @t2(i32* nocapture %src) nounwind ssp {
     15 entry:
     16 ; CHECK-LABEL: t2:
     17 ; CHECK: ldr s0, [x0]
     18 ; CHECK: ucvtf s0, s0
     19   %tmp1 = load i32* %src, align 4
     20   %tmp2 = uitofp i32 %tmp1 to float
     21   ret float %tmp2
     22 }
     23 
     24 define double @t3(i64* nocapture %src) nounwind ssp {
     25 entry:
     26 ; CHECK-LABEL: t3:
     27 ; CHECK: ldr d0, [x0]
     28 ; CHECK: scvtf d0, d0
     29   %tmp1 = load i64* %src, align 4
     30   %tmp2 = sitofp i64 %tmp1 to double
     31   ret double %tmp2
     32 }
     33 
     34 define double @t4(i64* nocapture %src) nounwind ssp {
     35 entry:
     36 ; CHECK-LABEL: t4:
     37 ; CHECK: ldr d0, [x0]
     38 ; CHECK: ucvtf d0, d0
     39   %tmp1 = load i64* %src, align 4
     40   %tmp2 = uitofp i64 %tmp1 to double
     41   ret double %tmp2
     42 }
     43 
     44 ; rdar://13136456
     45 define double @t5(i32* nocapture %src) nounwind ssp optsize {
     46 entry:
     47 ; CHECK-LABEL: t5:
     48 ; CHECK: ldr [[REG:w[0-9]+]], [x0]
     49 ; CHECK: scvtf d0, [[REG]]
     50   %tmp1 = load i32* %src, align 4
     51   %tmp2 = sitofp i32 %tmp1 to double
     52   ret double %tmp2
     53 }
     54 
     55 ; Check that we load in FP register when we want to convert into
     56 ; floating point value.
     57 ; This is much faster than loading on GPR and making the conversion
     58 ; GPR -> FPR.
     59 ; <rdar://problem/14599607>
     60 ;
     61 ; Check the flollowing patterns for signed/unsigned:
     62 ; 1. load with scaled imm to float.
     63 ; 2. load with scaled register to float.
     64 ; 3. load with scaled imm to double.
     65 ; 4. load with scaled register to double.
     66 ; 5. load with unscaled imm to float.
     67 ; 6. load with unscaled imm to double.
     68 ; With loading size: 8, 16, 32, and 64-bits.
     69 
     70 ; ********* 1. load with scaled imm to float. *********
     71 define float @fct1(i8* nocapture %sp0) {
     72 ; CHECK-LABEL: fct1:
     73 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
     74 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
     75 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
     76 entry:
     77   %addr = getelementptr i8* %sp0, i64 1
     78   %pix_sp0.0.copyload = load i8* %addr, align 1
     79   %val = uitofp i8 %pix_sp0.0.copyload to float
     80   %vmull.i = fmul float %val, %val
     81   ret float %vmull.i
     82 }
     83 
     84 define float @fct2(i16* nocapture %sp0) {
     85 ; CHECK-LABEL: fct2:
     86 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
     87 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
     88 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
     89 entry:
     90   %addr = getelementptr i16* %sp0, i64 1
     91   %pix_sp0.0.copyload = load i16* %addr, align 1
     92   %val = uitofp i16 %pix_sp0.0.copyload to float
     93   %vmull.i = fmul float %val, %val
     94   ret float %vmull.i
     95 }
     96 
     97 define float @fct3(i32* nocapture %sp0) {
     98 ; CHECK-LABEL: fct3:
     99 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    100 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    101 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    102 entry:
    103   %addr = getelementptr i32* %sp0, i64 1
    104   %pix_sp0.0.copyload = load i32* %addr, align 1
    105   %val = uitofp i32 %pix_sp0.0.copyload to float
    106   %vmull.i = fmul float %val, %val
    107   ret float %vmull.i
    108 }
    109 
    110 ; i64 -> f32 is not supported on floating point unit.
    111 define float @fct4(i64* nocapture %sp0) {
    112 ; CHECK-LABEL: fct4:
    113 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
    114 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
    115 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    116 entry:
    117   %addr = getelementptr i64* %sp0, i64 1
    118   %pix_sp0.0.copyload = load i64* %addr, align 1
    119   %val = uitofp i64 %pix_sp0.0.copyload to float
    120   %vmull.i = fmul float %val, %val
    121   ret float %vmull.i
    122 }
    123 
    124 ; ********* 2. load with scaled register to float. *********
    125 define float @fct5(i8* nocapture %sp0, i64 %offset) {
    126 ; CHECK-LABEL: fct5:
    127 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    128 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    129 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    130 entry:
    131   %addr = getelementptr i8* %sp0, i64 %offset
    132   %pix_sp0.0.copyload = load i8* %addr, align 1
    133   %val = uitofp i8 %pix_sp0.0.copyload to float
    134   %vmull.i = fmul float %val, %val
    135   ret float %vmull.i
    136 }
    137 
    138 define float @fct6(i16* nocapture %sp0, i64 %offset) {
    139 ; CHECK-LABEL: fct6:
    140 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    141 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    142 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    143 entry:
    144   %addr = getelementptr i16* %sp0, i64 %offset
    145   %pix_sp0.0.copyload = load i16* %addr, align 1
    146   %val = uitofp i16 %pix_sp0.0.copyload to float
    147   %vmull.i = fmul float %val, %val
    148   ret float %vmull.i
    149 }
    150 
    151 define float @fct7(i32* nocapture %sp0, i64 %offset) {
    152 ; CHECK-LABEL: fct7:
    153 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    154 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    155 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    156 entry:
    157   %addr = getelementptr i32* %sp0, i64 %offset
    158   %pix_sp0.0.copyload = load i32* %addr, align 1
    159   %val = uitofp i32 %pix_sp0.0.copyload to float
    160   %vmull.i = fmul float %val, %val
    161   ret float %vmull.i
    162 }
    163 
    164 ; i64 -> f32 is not supported on floating point unit.
    165 define float @fct8(i64* nocapture %sp0, i64 %offset) {
    166 ; CHECK-LABEL: fct8:
    167 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    168 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
    169 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    170 entry:
    171   %addr = getelementptr i64* %sp0, i64 %offset
    172   %pix_sp0.0.copyload = load i64* %addr, align 1
    173   %val = uitofp i64 %pix_sp0.0.copyload to float
    174   %vmull.i = fmul float %val, %val
    175   ret float %vmull.i
    176 }
    177 
    178 
    179 ; ********* 3. load with scaled imm to double. *********
    180 define double @fct9(i8* nocapture %sp0) {
    181 ; CHECK-LABEL: fct9:
    182 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    183 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    184 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    185 entry:
    186   %addr = getelementptr i8* %sp0, i64 1
    187   %pix_sp0.0.copyload = load i8* %addr, align 1
    188   %val = uitofp i8 %pix_sp0.0.copyload to double
    189   %vmull.i = fmul double %val, %val
    190   ret double %vmull.i
    191 }
    192 
    193 define double @fct10(i16* nocapture %sp0) {
    194 ; CHECK-LABEL: fct10:
    195 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    196 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    197 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    198 entry:
    199   %addr = getelementptr i16* %sp0, i64 1
    200   %pix_sp0.0.copyload = load i16* %addr, align 1
    201   %val = uitofp i16 %pix_sp0.0.copyload to double
    202   %vmull.i = fmul double %val, %val
    203   ret double %vmull.i
    204 }
    205 
    206 define double @fct11(i32* nocapture %sp0) {
    207 ; CHECK-LABEL: fct11:
    208 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    209 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    210 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    211 entry:
    212   %addr = getelementptr i32* %sp0, i64 1
    213   %pix_sp0.0.copyload = load i32* %addr, align 1
    214   %val = uitofp i32 %pix_sp0.0.copyload to double
    215   %vmull.i = fmul double %val, %val
    216   ret double %vmull.i
    217 }
    218 
    219 define double @fct12(i64* nocapture %sp0) {
    220 ; CHECK-LABEL: fct12:
    221 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
    222 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    223 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    224 entry:
    225   %addr = getelementptr i64* %sp0, i64 1
    226   %pix_sp0.0.copyload = load i64* %addr, align 1
    227   %val = uitofp i64 %pix_sp0.0.copyload to double
    228   %vmull.i = fmul double %val, %val
    229   ret double %vmull.i
    230 }
    231 
    232 ; ********* 4. load with scaled register to double. *********
    233 define double @fct13(i8* nocapture %sp0, i64 %offset) {
    234 ; CHECK-LABEL: fct13:
    235 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    236 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    237 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    238 entry:
    239   %addr = getelementptr i8* %sp0, i64 %offset
    240   %pix_sp0.0.copyload = load i8* %addr, align 1
    241   %val = uitofp i8 %pix_sp0.0.copyload to double
    242   %vmull.i = fmul double %val, %val
    243   ret double %vmull.i
    244 }
    245 
    246 define double @fct14(i16* nocapture %sp0, i64 %offset) {
    247 ; CHECK-LABEL: fct14:
    248 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    249 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    250 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    251 entry:
    252   %addr = getelementptr i16* %sp0, i64 %offset
    253   %pix_sp0.0.copyload = load i16* %addr, align 1
    254   %val = uitofp i16 %pix_sp0.0.copyload to double
    255   %vmull.i = fmul double %val, %val
    256   ret double %vmull.i
    257 }
    258 
    259 define double @fct15(i32* nocapture %sp0, i64 %offset) {
    260 ; CHECK-LABEL: fct15:
    261 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    262 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    263 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    264 entry:
    265   %addr = getelementptr i32* %sp0, i64 %offset
    266   %pix_sp0.0.copyload = load i32* %addr, align 1
    267   %val = uitofp i32 %pix_sp0.0.copyload to double
    268   %vmull.i = fmul double %val, %val
    269   ret double %vmull.i
    270 }
    271 
    272 define double @fct16(i64* nocapture %sp0, i64 %offset) {
    273 ; CHECK-LABEL: fct16:
    274 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    275 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    276 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    277 entry:
    278   %addr = getelementptr i64* %sp0, i64 %offset
    279   %pix_sp0.0.copyload = load i64* %addr, align 1
    280   %val = uitofp i64 %pix_sp0.0.copyload to double
    281   %vmull.i = fmul double %val, %val
    282   ret double %vmull.i
    283 }
    284 
    285 ; ********* 5. load with unscaled imm to float. *********
    286 define float @fct17(i8* nocapture %sp0) {
    287 entry:
    288 ; CHECK-LABEL: fct17:
    289 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
    290 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    291 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    292   %bitcast = ptrtoint i8* %sp0 to i64
    293   %add = add i64 %bitcast, -1
    294   %addr = inttoptr i64 %add to i8*
    295   %pix_sp0.0.copyload = load i8* %addr, align 1
    296   %val = uitofp i8 %pix_sp0.0.copyload to float
    297   %vmull.i = fmul float %val, %val
    298   ret float %vmull.i
    299 }
    300 
    301 define float @fct18(i16* nocapture %sp0) {
    302 ; CHECK-LABEL: fct18:
    303 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    304 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    305 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    306   %bitcast = ptrtoint i16* %sp0 to i64
    307   %add = add i64 %bitcast, 1
    308   %addr = inttoptr i64 %add to i16*
    309   %pix_sp0.0.copyload = load i16* %addr, align 1
    310   %val = uitofp i16 %pix_sp0.0.copyload to float
    311   %vmull.i = fmul float %val, %val
    312   ret float %vmull.i
    313 }
    314 
    315 define float @fct19(i32* nocapture %sp0) {
    316 ; CHECK-LABEL: fct19:
    317 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    318 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
    319 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    320   %bitcast = ptrtoint i32* %sp0 to i64
    321   %add = add i64 %bitcast, 1
    322   %addr = inttoptr i64 %add to i32*
    323   %pix_sp0.0.copyload = load i32* %addr, align 1
    324   %val = uitofp i32 %pix_sp0.0.copyload to float
    325   %vmull.i = fmul float %val, %val
    326   ret float %vmull.i
    327 }
    328 
    329 ; i64 -> f32 is not supported on floating point unit.
    330 define float @fct20(i64* nocapture %sp0) {
    331 ; CHECK-LABEL: fct20:
    332 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
    333 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
    334 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    335   %bitcast = ptrtoint i64* %sp0 to i64
    336   %add = add i64 %bitcast, 1
    337   %addr = inttoptr i64 %add to i64*
    338   %pix_sp0.0.copyload = load i64* %addr, align 1
    339   %val = uitofp i64 %pix_sp0.0.copyload to float
    340   %vmull.i = fmul float %val, %val
    341   ret float %vmull.i
    342 
    343 }
    344 
    345 ; ********* 6. load with unscaled imm to double. *********
    346 define double @fct21(i8* nocapture %sp0) {
    347 entry:
    348 ; CHECK-LABEL: fct21:
    349 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
    350 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    351 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    352   %bitcast = ptrtoint i8* %sp0 to i64
    353   %add = add i64 %bitcast, -1
    354   %addr = inttoptr i64 %add to i8*
    355   %pix_sp0.0.copyload = load i8* %addr, align 1
    356   %val = uitofp i8 %pix_sp0.0.copyload to double
    357   %vmull.i = fmul double %val, %val
    358   ret double %vmull.i
    359 }
    360 
    361 define double @fct22(i16* nocapture %sp0) {
    362 ; CHECK-LABEL: fct22:
    363 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    364 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    365 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    366   %bitcast = ptrtoint i16* %sp0 to i64
    367   %add = add i64 %bitcast, 1
    368   %addr = inttoptr i64 %add to i16*
    369   %pix_sp0.0.copyload = load i16* %addr, align 1
    370   %val = uitofp i16 %pix_sp0.0.copyload to double
    371   %vmull.i = fmul double %val, %val
    372   ret double %vmull.i
    373 }
    374 
    375 define double @fct23(i32* nocapture %sp0) {
    376 ; CHECK-LABEL: fct23:
    377 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    378 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    379 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    380   %bitcast = ptrtoint i32* %sp0 to i64
    381   %add = add i64 %bitcast, 1
    382   %addr = inttoptr i64 %add to i32*
    383   %pix_sp0.0.copyload = load i32* %addr, align 1
    384   %val = uitofp i32 %pix_sp0.0.copyload to double
    385   %vmull.i = fmul double %val, %val
    386   ret double %vmull.i
    387 }
    388 
    389 define double @fct24(i64* nocapture %sp0) {
    390 ; CHECK-LABEL: fct24:
    391 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
    392 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
    393 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    394   %bitcast = ptrtoint i64* %sp0 to i64
    395   %add = add i64 %bitcast, 1
    396   %addr = inttoptr i64 %add to i64*
    397   %pix_sp0.0.copyload = load i64* %addr, align 1
    398   %val = uitofp i64 %pix_sp0.0.copyload to double
    399   %vmull.i = fmul double %val, %val
    400   ret double %vmull.i
    401 
    402 }
    403 
    404 ; ********* 1s. load with scaled imm to float. *********
    405 define float @sfct1(i8* nocapture %sp0) {
    406 ; CHECK-LABEL: sfct1:
    407 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
    408 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    409 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    410 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    411 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    412 entry:
    413   %addr = getelementptr i8* %sp0, i64 1
    414   %pix_sp0.0.copyload = load i8* %addr, align 1
    415   %val = sitofp i8 %pix_sp0.0.copyload to float
    416   %vmull.i = fmul float %val, %val
    417   ret float %vmull.i
    418 }
    419 
    420 define float @sfct2(i16* nocapture %sp0) {
    421 ; CHECK-LABEL: sfct2:
    422 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    423 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    424 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    425 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    426 entry:
    427   %addr = getelementptr i16* %sp0, i64 1
    428   %pix_sp0.0.copyload = load i16* %addr, align 1
    429   %val = sitofp i16 %pix_sp0.0.copyload to float
    430   %vmull.i = fmul float %val, %val
    431   ret float %vmull.i
    432 }
    433 
    434 define float @sfct3(i32* nocapture %sp0) {
    435 ; CHECK-LABEL: sfct3:
    436 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    437 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    438 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    439 entry:
    440   %addr = getelementptr i32* %sp0, i64 1
    441   %pix_sp0.0.copyload = load i32* %addr, align 1
    442   %val = sitofp i32 %pix_sp0.0.copyload to float
    443   %vmull.i = fmul float %val, %val
    444   ret float %vmull.i
    445 }
    446 
    447 ; i64 -> f32 is not supported on floating point unit.
    448 define float @sfct4(i64* nocapture %sp0) {
    449 ; CHECK-LABEL: sfct4:
    450 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
    451 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
    452 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    453 entry:
    454   %addr = getelementptr i64* %sp0, i64 1
    455   %pix_sp0.0.copyload = load i64* %addr, align 1
    456   %val = sitofp i64 %pix_sp0.0.copyload to float
    457   %vmull.i = fmul float %val, %val
    458   ret float %vmull.i
    459 }
    460 
    461 ; ********* 2s. load with scaled register to float. *********
    462 define float @sfct5(i8* nocapture %sp0, i64 %offset) {
    463 ; CHECK-LABEL: sfct5:
    464 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
    465 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    466 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    467 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    468 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    469 entry:
    470   %addr = getelementptr i8* %sp0, i64 %offset
    471   %pix_sp0.0.copyload = load i8* %addr, align 1
    472   %val = sitofp i8 %pix_sp0.0.copyload to float
    473   %vmull.i = fmul float %val, %val
    474   ret float %vmull.i
    475 }
    476 
    477 define float @sfct6(i16* nocapture %sp0, i64 %offset) {
    478 ; CHECK-LABEL: sfct6:
    479 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    480 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    481 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    482 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    483 entry:
    484   %addr = getelementptr i16* %sp0, i64 %offset
    485   %pix_sp0.0.copyload = load i16* %addr, align 1
    486   %val = sitofp i16 %pix_sp0.0.copyload to float
    487   %vmull.i = fmul float %val, %val
    488   ret float %vmull.i
    489 }
    490 
    491 define float @sfct7(i32* nocapture %sp0, i64 %offset) {
    492 ; CHECK-LABEL: sfct7:
    493 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    494 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    495 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    496 entry:
    497   %addr = getelementptr i32* %sp0, i64 %offset
    498   %pix_sp0.0.copyload = load i32* %addr, align 1
    499   %val = sitofp i32 %pix_sp0.0.copyload to float
    500   %vmull.i = fmul float %val, %val
    501   ret float %vmull.i
    502 }
    503 
    504 ; i64 -> f32 is not supported on floating point unit.
    505 define float @sfct8(i64* nocapture %sp0, i64 %offset) {
    506 ; CHECK-LABEL: sfct8:
    507 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    508 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
    509 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    510 entry:
    511   %addr = getelementptr i64* %sp0, i64 %offset
    512   %pix_sp0.0.copyload = load i64* %addr, align 1
    513   %val = sitofp i64 %pix_sp0.0.copyload to float
    514   %vmull.i = fmul float %val, %val
    515   ret float %vmull.i
    516 }
    517 
    518 ; ********* 3s. load with scaled imm to double. *********
    519 define double @sfct9(i8* nocapture %sp0) {
    520 ; CHECK-LABEL: sfct9:
    521 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
    522 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    523 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    524 entry:
    525   %addr = getelementptr i8* %sp0, i64 1
    526   %pix_sp0.0.copyload = load i8* %addr, align 1
    527   %val = sitofp i8 %pix_sp0.0.copyload to double
    528   %vmull.i = fmul double %val, %val
    529   ret double %vmull.i
    530 }
    531 
    532 define double @sfct10(i16* nocapture %sp0) {
    533 ; CHECK-LABEL: sfct10:
    534 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
    535 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    536 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    537 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    538 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    539 entry:
    540   %addr = getelementptr i16* %sp0, i64 1
    541   %pix_sp0.0.copyload = load i16* %addr, align 1
    542   %val = sitofp i16 %pix_sp0.0.copyload to double
    543   %vmull.i = fmul double %val, %val
    544   ret double %vmull.i
    545 }
    546 
    547 define double @sfct11(i32* nocapture %sp0) {
    548 ; CHECK-LABEL: sfct11:
    549 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
    550 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    551 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    552 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    553 entry:
    554   %addr = getelementptr i32* %sp0, i64 1
    555   %pix_sp0.0.copyload = load i32* %addr, align 1
    556   %val = sitofp i32 %pix_sp0.0.copyload to double
    557   %vmull.i = fmul double %val, %val
    558   ret double %vmull.i
    559 }
    560 
    561 define double @sfct12(i64* nocapture %sp0) {
    562 ; CHECK-LABEL: sfct12:
    563 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
    564 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    565 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    566 entry:
    567   %addr = getelementptr i64* %sp0, i64 1
    568   %pix_sp0.0.copyload = load i64* %addr, align 1
    569   %val = sitofp i64 %pix_sp0.0.copyload to double
    570   %vmull.i = fmul double %val, %val
    571   ret double %vmull.i
    572 }
    573 
    574 ; ********* 4s. load with scaled register to double. *********
    575 define double @sfct13(i8* nocapture %sp0, i64 %offset) {
    576 ; CHECK-LABEL: sfct13:
    577 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
    578 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    579 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    580 entry:
    581   %addr = getelementptr i8* %sp0, i64 %offset
    582   %pix_sp0.0.copyload = load i8* %addr, align 1
    583   %val = sitofp i8 %pix_sp0.0.copyload to double
    584   %vmull.i = fmul double %val, %val
    585   ret double %vmull.i
    586 }
    587 
    588 define double @sfct14(i16* nocapture %sp0, i64 %offset) {
    589 ; CHECK-LABEL: sfct14:
    590 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
    591 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    592 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    593 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    594 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    595 entry:
    596   %addr = getelementptr i16* %sp0, i64 %offset
    597   %pix_sp0.0.copyload = load i16* %addr, align 1
    598   %val = sitofp i16 %pix_sp0.0.copyload to double
    599   %vmull.i = fmul double %val, %val
    600   ret double %vmull.i
    601 }
    602 
    603 define double @sfct15(i32* nocapture %sp0, i64 %offset) {
    604 ; CHECK-LABEL: sfct15:
    605 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
    606 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    607 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    608 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    609 entry:
    610   %addr = getelementptr i32* %sp0, i64 %offset
    611   %pix_sp0.0.copyload = load i32* %addr, align 1
    612   %val = sitofp i32 %pix_sp0.0.copyload to double
    613   %vmull.i = fmul double %val, %val
    614   ret double %vmull.i
    615 }
    616 
    617 define double @sfct16(i64* nocapture %sp0, i64 %offset) {
    618 ; CHECK-LABEL: sfct16:
    619 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
    620 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    621 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    622 entry:
    623   %addr = getelementptr i64* %sp0, i64 %offset
    624   %pix_sp0.0.copyload = load i64* %addr, align 1
    625   %val = sitofp i64 %pix_sp0.0.copyload to double
    626   %vmull.i = fmul double %val, %val
    627   ret double %vmull.i
    628 }
    629 
    630 ; ********* 5s. load with unscaled imm to float. *********
    631 define float @sfct17(i8* nocapture %sp0) {
    632 entry:
    633 ; CHECK-LABEL: sfct17:
    634 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
    635 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    636 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    637 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    638 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    639   %bitcast = ptrtoint i8* %sp0 to i64
    640   %add = add i64 %bitcast, -1
    641   %addr = inttoptr i64 %add to i8*
    642   %pix_sp0.0.copyload = load i8* %addr, align 1
    643   %val = sitofp i8 %pix_sp0.0.copyload to float
    644   %vmull.i = fmul float %val, %val
    645   ret float %vmull.i
    646 }
    647 
    648 define float @sfct18(i16* nocapture %sp0) {
    649 ; CHECK-LABEL: sfct18:
    650 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    651 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    652 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    653 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    654   %bitcast = ptrtoint i16* %sp0 to i64
    655   %add = add i64 %bitcast, 1
    656   %addr = inttoptr i64 %add to i16*
    657   %pix_sp0.0.copyload = load i16* %addr, align 1
    658   %val = sitofp i16 %pix_sp0.0.copyload to float
    659   %vmull.i = fmul float %val, %val
    660   ret float %vmull.i
    661 }
    662 
    663 define float @sfct19(i32* nocapture %sp0) {
    664 ; CHECK-LABEL: sfct19:
    665 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    666 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
    667 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    668   %bitcast = ptrtoint i32* %sp0 to i64
    669   %add = add i64 %bitcast, 1
    670   %addr = inttoptr i64 %add to i32*
    671   %pix_sp0.0.copyload = load i32* %addr, align 1
    672   %val = sitofp i32 %pix_sp0.0.copyload to float
    673   %vmull.i = fmul float %val, %val
    674   ret float %vmull.i
    675 }
    676 
    677 ; i64 -> f32 is not supported on floating point unit.
    678 define float @sfct20(i64* nocapture %sp0) {
    679 ; CHECK-LABEL: sfct20:
    680 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
    681 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
    682 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    683   %bitcast = ptrtoint i64* %sp0 to i64
    684   %add = add i64 %bitcast, 1
    685   %addr = inttoptr i64 %add to i64*
    686   %pix_sp0.0.copyload = load i64* %addr, align 1
    687   %val = sitofp i64 %pix_sp0.0.copyload to float
    688   %vmull.i = fmul float %val, %val
    689   ret float %vmull.i
    690 
    691 }
    692 
    693 ; ********* 6s. load with unscaled imm to double. *********
    694 define double @sfct21(i8* nocapture %sp0) {
    695 entry:
    696 ; CHECK-LABEL: sfct21:
    697 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
    698 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    699 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    700   %bitcast = ptrtoint i8* %sp0 to i64
    701   %add = add i64 %bitcast, -1
    702   %addr = inttoptr i64 %add to i8*
    703   %pix_sp0.0.copyload = load i8* %addr, align 1
    704   %val = sitofp i8 %pix_sp0.0.copyload to double
    705   %vmull.i = fmul double %val, %val
    706   ret double %vmull.i
    707 }
    708 
    709 define double @sfct22(i16* nocapture %sp0) {
    710 ; CHECK-LABEL: sfct22:
    711 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
    712 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
    713 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
    714 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    715 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    716   %bitcast = ptrtoint i16* %sp0 to i64
    717   %add = add i64 %bitcast, 1
    718   %addr = inttoptr i64 %add to i16*
    719   %pix_sp0.0.copyload = load i16* %addr, align 1
    720   %val = sitofp i16 %pix_sp0.0.copyload to double
    721   %vmull.i = fmul double %val, %val
    722   ret double %vmull.i
    723 }
    724 
    725 define double @sfct23(i32* nocapture %sp0) {
    726 ; CHECK-LABEL: sfct23:
    727 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
    728 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
    729 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    730 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    731   %bitcast = ptrtoint i32* %sp0 to i64
    732   %add = add i64 %bitcast, 1
    733   %addr = inttoptr i64 %add to i32*
    734   %pix_sp0.0.copyload = load i32* %addr, align 1
    735   %val = sitofp i32 %pix_sp0.0.copyload to double
    736   %vmull.i = fmul double %val, %val
    737   ret double %vmull.i
    738 }
    739 
    740 define double @sfct24(i64* nocapture %sp0) {
    741 ; CHECK-LABEL: sfct24:
    742 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
    743 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
    744 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    745   %bitcast = ptrtoint i64* %sp0 to i64
    746   %add = add i64 %bitcast, 1
    747   %addr = inttoptr i64 %add to i64*
    748   %pix_sp0.0.copyload = load i64* %addr, align 1
    749   %val = sitofp i64 %pix_sp0.0.copyload to double
    750   %vmull.i = fmul double %val, %val
    751   ret double %vmull.i
    752 
    753 }
    754 
    755 ; Check that we do not use SSHLL code sequence when code size is a concern.
    756 define float @codesize_sfct17(i8* nocapture %sp0) optsize {
    757 entry:
    758 ; CHECK-LABEL: codesize_sfct17:
    759 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
    760 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
    761 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
    762   %bitcast = ptrtoint i8* %sp0 to i64
    763   %add = add i64 %bitcast, -1
    764   %addr = inttoptr i64 %add to i8*
    765   %pix_sp0.0.copyload = load i8* %addr, align 1
    766   %val = sitofp i8 %pix_sp0.0.copyload to float
    767   %vmull.i = fmul float %val, %val
    768   ret float %vmull.i
    769 }
    770 
    771 define double @codesize_sfct11(i32* nocapture %sp0) minsize {
    772 ; CHECK-LABEL: sfct11:
    773 ; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
    774 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
    775 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
    776 entry:
    777   %addr = getelementptr i32* %sp0, i64 1
    778   %pix_sp0.0.copyload = load i32* %addr, align 1
    779   %val = sitofp i32 %pix_sp0.0.copyload to double
    780   %vmull.i = fmul double %val, %val
    781   ret double %vmull.i
    782 }
    783 
    784 ; Adding fp128 custom lowering makes these a little fragile since we have to
    785 ; return the correct mix of Legal/Expand from the custom method.
    786 ;
    787 ; rdar://problem/14991489
    788 
    789 define float @float_from_i128(i128 %in) {
    790 ; CHECK-LABEL: float_from_i128:
    791 ; CHECK: bl {{_?__floatuntisf}}
    792   %conv = uitofp i128 %in to float
    793   ret float %conv
    794 }
    795 
    796 define double @double_from_i128(i128 %in) {
    797 ; CHECK-LABEL: double_from_i128:
    798 ; CHECK: bl {{_?__floattidf}}
    799   %conv = sitofp i128 %in to double
    800   ret double %conv
    801 }
    802 
    803 define fp128 @fp128_from_i128(i128 %in) {
    804 ; CHECK-LABEL: fp128_from_i128:
    805 ; CHECK: bl {{_?__floatuntitf}}
    806   %conv = uitofp i128 %in to fp128
    807   ret fp128 %conv
    808 }
    809 
    810 define i128 @i128_from_float(float %in) {
    811 ; CHECK-LABEL: i128_from_float
    812 ; CHECK: bl {{_?__fixsfti}}
    813   %conv = fptosi float %in to i128
    814   ret i128 %conv
    815 }
    816 
    817 define i128 @i128_from_double(double %in) {
    818 ; CHECK-LABEL: i128_from_double
    819 ; CHECK: bl {{_?__fixunsdfti}}
    820   %conv = fptoui double %in to i128
    821   ret i128 %conv
    822 }
    823 
    824 define i128 @i128_from_fp128(fp128 %in) {
    825 ; CHECK-LABEL: i128_from_fp128
    826 ; CHECK: bl {{_?__fixtfti}}
    827   %conv = fptosi fp128 %in to i128
    828   ret i128 %conv
    829 }
    830 
    831