Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc  -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
      2 ; RUN:        < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
      3 
      4 define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
      5 ; CHECK-LABEL: test_select_cc_v8i8_i8:
      6 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
      7 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
      8 ; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
      9 ; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
     10 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
     11   %cmp31 = icmp eq i8 %a, %b
     12   %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
     13   ret <8x i8> %e
     14 }
     15 
     16 define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
     17 ; CHECK-LABEL: test_select_cc_v8i8_f32:
     18 ; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
     19 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
     20 ; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
     21   %cmp31 = fcmp oeq float %a, %b
     22   %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
     23   ret <8x i8> %e
     24 }
     25 
     26 define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
     27 ; CHECK-LABEL: test_select_cc_v8i8_f64:
     28 ; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
     29 ; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
     30   %cmp31 = fcmp oeq double %a, %b
     31   %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
     32   ret <8x i8> %e
     33 }
     34 
     35 define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
     36 ; CHECK-LABEL: test_select_cc_v16i8_i8:
     37 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
     38 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
     39 ; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
     40 ; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
     41 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
     42   %cmp31 = icmp eq i8 %a, %b
     43   %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
     44   ret <16x i8> %e
     45 }
     46 
     47 define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
     48 ; CHECK-LABEL: test_select_cc_v16i8_f32:
     49 ; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
     50 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
     51 ; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
     52   %cmp31 = fcmp oeq float %a, %b
     53   %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
     54   ret <16x i8> %e
     55 }
     56 
     57 define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
     58 ; CHECK-LABEL: test_select_cc_v16i8_f64:
     59 ; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
     60 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
     61 ; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
     62   %cmp31 = fcmp oeq double %a, %b
     63   %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
     64   ret <16x i8> %e
     65 }
     66 
     67 define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
     68 ; CHECK-LABEL: test_select_cc_v4i16:
     69 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
     70 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
     71 ; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
     72 ; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
     73 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
     74   %cmp31 = icmp eq i16 %a, %b
     75   %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
     76   ret <4x i16> %e
     77 }
     78 
     79 define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
     80 ; CHECK-LABEL: test_select_cc_v8i16:
     81 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
     82 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
     83 ; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
     84 ; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
     85 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
     86   %cmp31 = icmp eq i16 %a, %b
     87   %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
     88   ret <8x i16> %e
     89 }
     90 
     91 define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
     92 ; CHECK-LABEL: test_select_cc_v2i32:
     93 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
     94 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
     95 ; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
     96 ; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
     97 ; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
     98   %cmp31 = icmp eq i32 %a, %b
     99   %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
    100   ret <2x i32> %e
    101 }
    102 
    103 define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
    104 ; CHECK-LABEL: test_select_cc_v4i32:
    105 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
    106 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
    107 ; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
    108 ; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
    109 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
    110   %cmp31 = icmp eq i32 %a, %b
    111   %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
    112   ret <4x i32> %e
    113 }
    114 
    115 define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
    116 ; CHECK-LABEL: test_select_cc_v1i64:
    117 ; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
    118 ; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
    119 ; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
    120 ; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
    121   %cmp31 = icmp eq i64 %a, %b
    122   %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
    123   ret <1x i64> %e
    124 }
    125 
    126 define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
    127 ; CHECK-LABEL: test_select_cc_v2i64:
    128 ; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
    129 ; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
    130 ; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
    131 ; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
    132 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
    133   %cmp31 = icmp eq i64 %a, %b
    134   %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
    135   ret <2x i64> %e
    136 }
    137 
    138 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
    139 ; CHECK-LABEL: test_select_cc_v1f32:
    140 ; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
    141 ; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
    142   %cmp31 = fcmp oeq float %a, %b
    143   %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
    144   ret <1 x float> %e
    145 }
    146 
    147 define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
    148 ; CHECK-LABEL: test_select_cc_v2f32:
    149 ; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
    150 ; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
    151 ; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
    152   %cmp31 = fcmp oeq float %a, %b
    153   %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
    154   ret <2 x float> %e
    155 }
    156 
    157 define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
    158 ; CHECK-LABEL: test_select_cc_v4f32:
    159 ; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
    160 ; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
    161 ; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
    162   %cmp31 = fcmp oeq float %a, %b
    163   %e = select i1 %cmp31, <4x float> %c, <4x float> %d
    164   ret <4x float> %e
    165 }
    166 
    167 define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
    168 ; CHECK-LABEL: test_select_cc_v4f32_icmp:
    169 ; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
    170 ; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
    171 ; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
    172 ; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
    173 ; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
    174   %cmp31 = icmp eq i32 %a, %b
    175   %e = select i1 %cmp31, <4x float> %c, <4x float> %d
    176   ret <4x float> %e
    177 }
    178 
    179 define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
    180 ; CHECK-LABEL: test_select_cc_v1f64:
    181 ; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
    182 ; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
    183   %cmp31 = fcmp oeq double %a, %b
    184   %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
    185   ret <1 x double> %e
    186 }
    187 
    188 define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
    189 ; CHECK-LABEL: test_select_cc_v1f64_icmp:
    190 ; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
    191 ; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
    192 ; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
    193 ; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
    194   %cmp31 = icmp eq i64 %a, %b
    195   %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
    196   ret <1 x double> %e
    197 }
    198 
    199 define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
    200 ; CHECK-LABEL: test_select_cc_v2f64:
    201 ; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
    202 ; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
    203 ; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
    204   %cmp31 = fcmp oeq double %a, %b
    205   %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
    206   ret <2 x double> %e
    207 }
    208 
    209 ; Special case: when the select condition is an icmp with i1 operands, don't
    210 ; do the comparison on vectors.
    211 ; Part of PR21549.
    212 define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
    213 ; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
    214 ; CHECK: tst   w0, #0x1
    215 ; CHECK: csetm [[MASK:w[0-9]+]], ne
    216 ; CHECK: dup   [[DUPMASK:v[0-9]+]].2s, [[MASK]]
    217 ; CHECK: bsl   [[DUPMASK]].8b, v0.8b, v1.8b
    218 ; CHECK: mov   v0.16b, [[DUPMASK]].16b
    219   %cmp = icmp ne i1 %cc, 0
    220   %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
    221   ret <2 x i32> %e
    222 }
    223 
    224 ; Also make sure we support irregular/non-power-of-2 types such as v3f32.
    225 define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
    226 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
    227 ; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
    228 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
    229 ; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
    230 ; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
    231 ; CHECK-NEXT: ret
    232   %cc = fcmp oeq float %c1, %c2
    233   %r = select i1 %cc, <3 x float> %a, <3 x float> %b
    234   ret <3 x float> %r
    235 }
    236 
    237 define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
    238 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
    239 ; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
    240 ; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
    241 ; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
    242 ; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
    243 ; CHECK-NEXT: ret
    244   %cc = fcmp oeq double %c1, %c2
    245   %r = select i1 %cc, <3 x float> %a, <3 x float> %b
    246   ret <3 x float> %r
    247 }
    248 
    249 attributes #0 = { nounwind}
    250