1 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ 2 ; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s 3 4 define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { 5 ; CHECK-LABEL: test_select_cc_v8i8_i8: 6 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 7 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 8 ; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b 9 ; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0] 10 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 11 %cmp31 = icmp eq i8 %a, %b 12 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 13 ret <8x i8> %e 14 } 15 16 define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { 17 ; CHECK-LABEL: test_select_cc_v8i8_f32: 18 ; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s 19 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] 20 ; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b 21 %cmp31 = fcmp oeq float %a, %b 22 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 23 ret <8x i8> %e 24 } 25 26 define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { 27 ; CHECK-LABEL: test_select_cc_v8i8_f64: 28 ; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1 29 ; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b 30 %cmp31 = fcmp oeq double %a, %b 31 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 32 ret <8x i8> %e 33 } 34 35 define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { 36 ; CHECK-LABEL: test_select_cc_v16i8_i8: 37 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 38 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 39 ; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b 40 ; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0] 41 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 42 %cmp31 = icmp eq i8 %a, %b 43 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 44 ret <16x i8> %e 45 } 46 47 define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { 48 ; CHECK-LABEL: test_select_cc_v16i8_f32: 49 ; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s 50 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 51 ; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b 52 %cmp31 = fcmp oeq float %a, %b 53 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 54 ret <16x i8> %e 55 } 56 57 define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { 58 ; CHECK-LABEL: test_select_cc_v16i8_f64: 59 ; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d 60 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 61 ; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b 62 %cmp31 = fcmp oeq double %a, %b 63 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 64 ret <16x i8> %e 65 } 66 67 define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { 68 ; CHECK-LABEL: test_select_cc_v4i16: 69 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 70 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 71 ; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h 72 ; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0] 73 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 74 %cmp31 = icmp eq i16 %a, %b 75 %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d 76 ret <4x i16> %e 77 } 78 79 define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { 80 ; CHECK-LABEL: test_select_cc_v8i16: 81 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 82 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 83 ; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h 84 ; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0] 85 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 86 %cmp31 = icmp eq i16 %a, %b 87 %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d 88 ret <8x i16> %e 89 } 90 91 define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { 92 ; CHECK-LABEL: test_select_cc_v2i32: 93 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 94 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 95 ; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s 96 ; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] 97 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 98 %cmp31 = icmp eq i32 %a, %b 99 %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d 100 ret <2x i32> %e 101 } 102 103 define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { 104 ; CHECK-LABEL: test_select_cc_v4i32: 105 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 106 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 107 ; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s 108 ; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 109 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 110 %cmp31 = icmp eq i32 %a, %b 111 %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d 112 ret <4x i32> %e 113 } 114 115 define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { 116 ; CHECK-LABEL: test_select_cc_v1i64: 117 ; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0 118 ; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1 119 ; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]] 120 ; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b 121 %cmp31 = icmp eq i64 %a, %b 122 %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d 123 ret <1x i64> %e 124 } 125 126 define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { 127 ; CHECK-LABEL: test_select_cc_v2i64: 128 ; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0 129 ; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1 130 ; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d 131 ; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 132 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 133 %cmp31 = icmp eq i64 %a, %b 134 %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d 135 ret <2x i64> %e 136 } 137 138 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { 139 ; CHECK-LABEL: test_select_cc_v1f32: 140 ; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s 141 ; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b 142 %cmp31 = fcmp oeq float %a, %b 143 %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d 144 ret <1 x float> %e 145 } 146 147 define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { 148 ; CHECK-LABEL: test_select_cc_v2f32: 149 ; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s 150 ; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] 151 ; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b 152 %cmp31 = fcmp oeq float %a, %b 153 %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d 154 ret <2 x float> %e 155 } 156 157 define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { 158 ; CHECK-LABEL: test_select_cc_v4f32: 159 ; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s 160 ; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 161 ; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b 162 %cmp31 = fcmp oeq float %a, %b 163 %e = select i1 %cmp31, <4x float> %c, <4x float> %d 164 ret <4x float> %e 165 } 166 167 define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { 168 ; CHECK-LABEL: test_select_cc_v4f32_icmp: 169 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 170 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 171 ; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s 172 ; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 173 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 174 %cmp31 = icmp eq i32 %a, %b 175 %e = select i1 %cmp31, <4x float> %c, <4x float> %d 176 ret <4x float> %e 177 } 178 179 define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { 180 ; CHECK-LABEL: test_select_cc_v1f64: 181 ; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1 182 ; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b 183 %cmp31 = fcmp oeq double %a, %b 184 %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d 185 ret <1 x double> %e 186 } 187 188 define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { 189 ; CHECK-LABEL: test_select_cc_v1f64_icmp: 190 ; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0 191 ; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1 192 ; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]] 193 ; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b 194 %cmp31 = icmp eq i64 %a, %b 195 %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d 196 ret <1 x double> %e 197 } 198 199 define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { 200 ; CHECK-LABEL: test_select_cc_v2f64: 201 ; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d 202 ; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 203 ; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b 204 %cmp31 = fcmp oeq double %a, %b 205 %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d 206 ret <2 x double> %e 207 } 208 209 ; Special case: when the select condition is an icmp with i1 operands, don't 210 ; do the comparison on vectors. 211 ; Part of PR21549. 212 define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) { 213 ; CHECK-LABEL: test_select_cc_v2i32_icmpi1: 214 ; CHECK: tst w0, #0x1 215 ; CHECK: csetm [[MASK:w[0-9]+]], ne 216 ; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]] 217 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 218 ; CHECK: mov v0.16b, [[DUPMASK]].16b 219 %cmp = icmp ne i1 %cc, 0 220 %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b 221 ret <2 x i32> %e 222 } 223 224 ; Also make sure we support irregular/non-power-of-2 types such as v3f32. 225 define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 { 226 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32: 227 ; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s 228 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 229 ; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b 230 ; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b 231 ; CHECK-NEXT: ret 232 %cc = fcmp oeq float %c1, %c2 233 %r = select i1 %cc, <3 x float> %a, <3 x float> %b 234 ret <3 x float> %r 235 } 236 237 define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 { 238 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64: 239 ; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d 240 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 241 ; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b 242 ; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b 243 ; CHECK-NEXT: ret 244 %cc = fcmp oeq double %c1, %c2 245 %r = select i1 %cc, <3 x float> %a, <3 x float> %b 246 ret <3 x float> %r 247 } 248 249 attributes #0 = { nounwind} 250