Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
      2 
      3 ; CHECK-LABEL: test1
      4 ; CHECK-NOT: fmul.2s
      5 ; CHECK: fcvtzs.2s v0, v0, #4
      6 ; CHECK: ret
      7 define <2 x i32> @test1(<2 x float> %f) {
      8   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
      9   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
     10   ret <2 x i32> %vcvt.i
     11 }
     12 
     13 ; CHECK-LABEL: test2
     14 ; CHECK-NOT: fmul.4s
     15 ; CHECK: fcvtzs.4s v0, v0, #3
     16 ; CHECK: ret
     17 define <4 x i32> @test2(<4 x float> %f) {
     18   %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
     19   %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
     20   ret <4 x i32> %vcvt.i
     21 }
     22 
     23 ; CHECK-LABEL: test3
     24 ; CHECK-NOT: fmul.2d
     25 ; CHECK: fcvtzs.2d v0, v0, #5
     26 ; CHECK: ret
     27 define <2 x i64> @test3(<2 x double> %d) {
     28   %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
     29   %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
     30   ret <2 x i64> %vcvt.i
     31 }
     32 
     33 ; Truncate double to i32
     34 ; CHECK-LABEL: test4
     35 ; CHECK-NOT: fmul.2d v0, v0, #4
     36 ; CHECK: fcvtzs.2d v0, v0
     37 ; CHECK: xtn.2s
     38 ; CHECK: ret
     39 define <2 x i32> @test4(<2 x double> %d) {
     40   %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
     41   %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
     42   ret <2 x i32> %vcvt.i
     43 }
     44 
     45 ; Truncate float to i16
     46 ; CHECK-LABEL: test5
     47 ; CHECK-NOT: fmul.2s
     48 ; CHECK: fcvtzs.2s v0, v0, #4
     49 ; CHECK: ret
     50 define <2 x i16> @test5(<2 x float> %f) {
     51   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
     52   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
     53   ret <2 x i16> %vcvt.i
     54 }
     55 
     56 ; Don't convert float to i64
     57 ; CHECK-LABEL: test6
     58 ; CHECK: fmov.2s v1, #16.00000000
     59 ; CHECK: fmul.2s v0, v0, v1
     60 ; CHECK: fcvtl v0.2d, v0.2s
     61 ; CHECK: fcvtzs.2d v0, v0
     62 ; CHECK: ret
     63 define <2 x i64> @test6(<2 x float> %f) {
     64   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
     65   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
     66   ret <2 x i64> %vcvt.i
     67 }
     68 
     69 ; Check unsigned conversion.
     70 ; CHECK-LABEL: test7
     71 ; CHECK-NOT: fmul.2s
     72 ; CHECK: fcvtzu.2s v0, v0, #4
     73 ; CHECK: ret
     74 define <2 x i32> @test7(<2 x float> %f) {
     75   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
     76   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
     77   ret <2 x i32> %vcvt.i
     78 }
     79 
     80 ; Test which should not fold due to non-power of 2.
     81 ; CHECK-LABEL: test8
     82 ; CHECK: fmov.2s v1, #17.00000000
     83 ; CHECK: fmul.2s v0, v0, v1
     84 ; CHECK: fcvtzu.2s v0, v0
     85 ; CHECK: ret
     86 define <2 x i32> @test8(<2 x float> %f) {
     87   %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
     88   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
     89   ret <2 x i32> %vcvt.i
     90 }
     91 
     92 ; Test which should not fold due to non-matching power of 2.
     93 ; CHECK-LABEL: test9
     94 ; CHECK: fmul.2s v0, v0, v1
     95 ; CHECK: fcvtzu.2s v0, v0
     96 ; CHECK: ret
     97 define <2 x i32> @test9(<2 x float> %f) {
     98   %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
     99   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
    100   ret <2 x i32> %vcvt.i
    101 }
    102 
    103 ; Combine all undefs.
    104 ; CHECK-LABEL: test10
    105 ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
    106 ; CHECK: ret
    107 define <2 x i32> @test10(<2 x float> %f) {
    108   %mul.i = fmul <2 x float> %f, <float undef, float undef>
    109   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
    110   ret <2 x i32> %vcvt.i
    111 }
    112 
    113 ; Combine if mix of undef and pow2.
    114 ; CHECK-LABEL: test11
    115 ; CHECK: fcvtzu.2s v0, v0, #3
    116 ; CHECK: ret
    117 define <2 x i32> @test11(<2 x float> %f) {
    118   %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
    119   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
    120   ret <2 x i32> %vcvt.i
    121 }
    122 
    123 ; Don't combine when multiplied by 0.0.
    124 ; CHECK-LABEL: test12
    125 ; CHECK: fmul.2s v0, v0, v1
    126 ; CHECK: fcvtzs.2s v0, v0
    127 ; CHECK: ret
    128 define <2 x i32> @test12(<2 x float> %f) {
    129   %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
    130   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
    131   ret <2 x i32> %vcvt.i
    132 }
    133 
    134 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
    135 ; CHECK-LABEL: test13
    136 ; CHECK: fmul.2s v0, v0, v1
    137 ; CHECK: fcvtzs.2s v0, v0
    138 ; CHECK: ret
    139 define <2 x i32> @test13(<2 x float> %f) {
    140   %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
    141   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
    142   ret <2 x i32> %vcvt.i
    143 }
    144 
    145 ; Test case where const is max power of 2 (i.e., 2^32).
    146 ; CHECK-LABEL: test14
    147 ; CHECK: fcvtzs.2s v0, v0, #32
    148 ; CHECK: ret
    149 define <2 x i32> @test14(<2 x float> %f) {
    150   %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
    151   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
    152   ret <2 x i32> %vcvt.i
    153 }
    154 
    155 ; CHECK-LABEL: test_illegal_fp_to_int:
    156 ; CHECK: fcvtzs.4s v0, v0, #2
    157 define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
    158   %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
    159   %val = fptosi <3 x float> %scale to <3 x i32>
    160   ret <3 x i32> %val
    161 }
    162