1 ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s 2 3 ; CHECK-LABEL: test1 4 ; CHECK-NOT: fmul.2s 5 ; CHECK: fcvtzs.2s v0, v0, #4 6 ; CHECK: ret 7 define <2 x i32> @test1(<2 x float> %f) { 8 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 9 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 10 ret <2 x i32> %vcvt.i 11 } 12 13 ; CHECK-LABEL: test2 14 ; CHECK-NOT: fmul.4s 15 ; CHECK: fcvtzs.4s v0, v0, #3 16 ; CHECK: ret 17 define <4 x i32> @test2(<4 x float> %f) { 18 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00> 19 %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32> 20 ret <4 x i32> %vcvt.i 21 } 22 23 ; CHECK-LABEL: test3 24 ; CHECK-NOT: fmul.2d 25 ; CHECK: fcvtzs.2d v0, v0, #5 26 ; CHECK: ret 27 define <2 x i64> @test3(<2 x double> %d) { 28 %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00> 29 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64> 30 ret <2 x i64> %vcvt.i 31 } 32 33 ; Truncate double to i32 34 ; CHECK-LABEL: test4 35 ; CHECK-NOT: fmul.2d v0, v0, #4 36 ; CHECK: fcvtzs.2d v0, v0 37 ; CHECK: xtn.2s 38 ; CHECK: ret 39 define <2 x i32> @test4(<2 x double> %d) { 40 %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00> 41 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32> 42 ret <2 x i32> %vcvt.i 43 } 44 45 ; Truncate float to i16 46 ; CHECK-LABEL: test5 47 ; CHECK-NOT: fmul.2s 48 ; CHECK: fcvtzs.2s v0, v0, #4 49 ; CHECK: ret 50 define <2 x i16> @test5(<2 x float> %f) { 51 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 52 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16> 53 ret <2 x i16> %vcvt.i 54 } 55 56 ; Don't convert float to i64 57 ; CHECK-LABEL: test6 58 ; CHECK: fmov.2s v1, #16.00000000 59 ; CHECK: fmul.2s v0, v0, v1 60 ; CHECK: fcvtl v0.2d, v0.2s 61 ; CHECK: fcvtzs.2d v0, v0 62 ; CHECK: ret 63 define <2 x i64> @test6(<2 x float> %f) { 64 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 65 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64> 66 ret <2 x i64> %vcvt.i 67 } 68 69 ; Check unsigned conversion. 70 ; CHECK-LABEL: test7 71 ; CHECK-NOT: fmul.2s 72 ; CHECK: fcvtzu.2s v0, v0, #4 73 ; CHECK: ret 74 define <2 x i32> @test7(<2 x float> %f) { 75 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 76 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 77 ret <2 x i32> %vcvt.i 78 } 79 80 ; Test which should not fold due to non-power of 2. 81 ; CHECK-LABEL: test8 82 ; CHECK: fmov.2s v1, #17.00000000 83 ; CHECK: fmul.2s v0, v0, v1 84 ; CHECK: fcvtzu.2s v0, v0 85 ; CHECK: ret 86 define <2 x i32> @test8(<2 x float> %f) { 87 %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00> 88 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 89 ret <2 x i32> %vcvt.i 90 } 91 92 ; Test which should not fold due to non-matching power of 2. 93 ; CHECK-LABEL: test9 94 ; CHECK: fmul.2s v0, v0, v1 95 ; CHECK: fcvtzu.2s v0, v0 96 ; CHECK: ret 97 define <2 x i32> @test9(<2 x float> %f) { 98 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00> 99 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 100 ret <2 x i32> %vcvt.i 101 } 102 103 ; Don't combine all undefs. 104 ; CHECK-LABEL: test10 105 ; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 106 ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}} 107 ; CHECK: ret 108 define <2 x i32> @test10(<2 x float> %f) { 109 %mul.i = fmul <2 x float> %f, <float undef, float undef> 110 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 111 ret <2 x i32> %vcvt.i 112 } 113 114 ; Combine if mix of undef and pow2. 115 ; CHECK-LABEL: test11 116 ; CHECK: fcvtzu.2s v0, v0, #3 117 ; CHECK: ret 118 define <2 x i32> @test11(<2 x float> %f) { 119 %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00> 120 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 121 ret <2 x i32> %vcvt.i 122 } 123 124 ; Don't combine when multiplied by 0.0. 125 ; CHECK-LABEL: test12 126 ; CHECK: fmul.2s v0, v0, v1 127 ; CHECK: fcvtzs.2s v0, v0 128 ; CHECK: ret 129 define <2 x i32> @test12(<2 x float> %f) { 130 %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00> 131 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 132 ret <2 x i32> %vcvt.i 133 } 134 135 ; Test which should not fold due to power of 2 out of range (i.e., 2^33). 136 ; CHECK-LABEL: test13 137 ; CHECK: fmul.2s v0, v0, v1 138 ; CHECK: fcvtzs.2s v0, v0 139 ; CHECK: ret 140 define <2 x i32> @test13(<2 x float> %f) { 141 %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000> 142 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 143 ret <2 x i32> %vcvt.i 144 } 145 146 ; Test case where const is max power of 2 (i.e., 2^32). 147 ; CHECK-LABEL: test14 148 ; CHECK: fcvtzs.2s v0, v0, #32 149 ; CHECK: ret 150 define <2 x i32> @test14(<2 x float> %f) { 151 %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000> 152 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 153 ret <2 x i32> %vcvt.i 154 } 155 156 ; CHECK-LABEL: test_illegal_fp_to_int: 157 ; CHECK: fcvtzs.4s v0, v0, #2 158 define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) { 159 %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0> 160 %val = fptosi <3 x float> %scale to <3 x i32> 161 ret <3 x i32> %val 162 } 163