Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
      2 
      3 ; CHECK-LABEL: test1
      4 ; CHECK-NOT: fmul.2s
      5 ; CHECK: fcvtzs.2s v0, v0, #4
      6 ; CHECK: ret
      7 define <2 x i32> @test1(<2 x float> %f) {
      8   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
      9   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
     10   ret <2 x i32> %vcvt.i
     11 }
     12 
     13 ; CHECK-LABEL: test2
     14 ; CHECK-NOT: fmul.4s
     15 ; CHECK: fcvtzs.4s v0, v0, #3
     16 ; CHECK: ret
     17 define <4 x i32> @test2(<4 x float> %f) {
     18   %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
     19   %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
     20   ret <4 x i32> %vcvt.i
     21 }
     22 
     23 ; CHECK-LABEL: test3
     24 ; CHECK-NOT: fmul.2d
     25 ; CHECK: fcvtzs.2d v0, v0, #5
     26 ; CHECK: ret
     27 define <2 x i64> @test3(<2 x double> %d) {
     28   %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
     29   %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
     30   ret <2 x i64> %vcvt.i
     31 }
     32 
     33 ; Truncate double to i32
     34 ; CHECK-LABEL: test4
     35 ; CHECK-NOT: fmul.2d v0, v0, #4
     36 ; CHECK: fcvtzs.2d v0, v0
     37 ; CHECK: xtn.2s
     38 ; CHECK: ret
     39 define <2 x i32> @test4(<2 x double> %d) {
     40   %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
     41   %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
     42   ret <2 x i32> %vcvt.i
     43 }
     44 
     45 ; Truncate float to i16
     46 ; CHECK-LABEL: test5
     47 ; CHECK-NOT: fmul.2s
     48 ; CHECK: fcvtzs.2s v0, v0, #4
     49 ; CHECK: ret
     50 define <2 x i16> @test5(<2 x float> %f) {
     51   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
     52   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
     53   ret <2 x i16> %vcvt.i
     54 }
     55 
     56 ; Don't convert float to i64
     57 ; CHECK-LABEL: test6
     58 ; CHECK: fmov.2s v1, #16.00000000
     59 ; CHECK: fmul.2s v0, v0, v1
     60 ; CHECK: fcvtl v0.2d, v0.2s
     61 ; CHECK: fcvtzs.2d v0, v0
     62 ; CHECK: ret
     63 define <2 x i64> @test6(<2 x float> %f) {
     64   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
     65   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
     66   ret <2 x i64> %vcvt.i
     67 }
     68 
     69 ; Check unsigned conversion.
     70 ; CHECK-LABEL: test7
     71 ; CHECK-NOT: fmul.2s
     72 ; CHECK: fcvtzu.2s v0, v0, #4
     73 ; CHECK: ret
     74 define <2 x i32> @test7(<2 x float> %f) {
     75   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
     76   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
     77   ret <2 x i32> %vcvt.i
     78 }
     79 
     80 ; Test which should not fold due to non-power of 2.
     81 ; CHECK-LABEL: test8
     82 ; CHECK: fmov.2s v1, #17.00000000
     83 ; CHECK: fmul.2s v0, v0, v1
     84 ; CHECK: fcvtzu.2s v0, v0
     85 ; CHECK: ret
     86 define <2 x i32> @test8(<2 x float> %f) {
     87   %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
     88   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
     89   ret <2 x i32> %vcvt.i
     90 }
     91 
     92 ; Test which should not fold due to non-matching power of 2.
     93 ; CHECK-LABEL: test9
     94 ; CHECK: fmul.2s v0, v0, v1
     95 ; CHECK: fcvtzu.2s v0, v0
     96 ; CHECK: ret
     97 define <2 x i32> @test9(<2 x float> %f) {
     98   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
     99   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
    100   ret <2 x i32> %vcvt.i
    101 }
    102 
    103 ; Don't combine all undefs.
    104 ; CHECK-LABEL: test10
    105 ; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    106 ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
    107 ; CHECK: ret
    108 define <2 x i32> @test10(<2 x float> %f) {
    109   %mul.i = fmul <2 x float> %f, <float undef, float undef>
    110   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
    111   ret <2 x i32> %vcvt.i
    112 }
    113 
    114 ; Combine if mix of undef and pow2.
    115 ; CHECK-LABEL: test11
    116 ; CHECK: fcvtzu.2s v0, v0, #3
    117 ; CHECK: ret
    118 define <2 x i32> @test11(<2 x float> %f) {
    119   %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
    120   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
    121   ret <2 x i32> %vcvt.i
    122 }
    123 
    124 ; Don't combine when multiplied by 0.0.
    125 ; CHECK-LABEL: test12
    126 ; CHECK: fmul.2s v0, v0, v1
    127 ; CHECK: fcvtzs.2s v0, v0
    128 ; CHECK: ret
    129 define <2 x i32> @test12(<2 x float> %f) {
    130   %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
    131   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
    132   ret <2 x i32> %vcvt.i
    133 }
    134 
    135 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
    136 ; CHECK-LABEL: test13
    137 ; CHECK: fmul.2s v0, v0, v1
    138 ; CHECK: fcvtzs.2s v0, v0
    139 ; CHECK: ret
    140 define <2 x i32> @test13(<2 x float> %f) {
    141   %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
    142   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
    143   ret <2 x i32> %vcvt.i
    144 }
    145 
    146 ; Test case where const is max power of 2 (i.e., 2^32).
    147 ; CHECK-LABEL: test14
    148 ; CHECK: fcvtzs.2s v0, v0, #32
    149 ; CHECK: ret
    150 define <2 x i32> @test14(<2 x float> %f) {
    151   %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
    152   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
    153   ret <2 x i32> %vcvt.i
    154 }
    155 
    156 ; CHECK-LABEL: test_illegal_fp_to_int:
    157 ; CHECK: fcvtzs.4s v0, v0, #2
    158 define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
    159   %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
    160   %val = fptosi <3 x float> %scale to <3 x i32>
    161   ret <3 x i32> %val
    162 }
    163