Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
      2 
      3 ; float16x4_t select_64(float16x4_t a, float16x4_t b, uint16x4_t c) { return vbsl_u16(c, a, b); }
      4 define <4 x half> @select_64(<4 x half> %a, <4 x half> %b, <4 x i16> %c) #0 {
      5 ; CHECK-LABEL: select_64:
      6 ; CHECK: bsl
      7 entry:
      8   %0 = bitcast <4 x half> %a to <4 x i16>
      9   %1 = bitcast <4 x half> %b to <4 x i16>
     10   %vbsl3.i = and <4 x i16> %0, %c
     11   %2 = xor <4 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1>
     12   %vbsl4.i = and <4 x i16> %1, %2
     13   %vbsl5.i = or <4 x i16> %vbsl3.i, %vbsl4.i
     14   %3 = bitcast <4 x i16> %vbsl5.i to <4 x half>
     15   ret <4 x half> %3
     16 }
     17 
     18 ; float16x8_t select_128(float16x8_t a, float16x8_t b, uint16x8_t c) { return vbslq_u16(c, a, b); }
     19 define <8 x half> @select_128(<8 x half> %a, <8 x half> %b, <8 x i16> %c) #0 {
     20 ; CHECK-LABEL: select_128:
     21 ; CHECK: bsl
     22 entry:
     23   %0 = bitcast <8 x half> %a to <8 x i16>
     24   %1 = bitcast <8 x half> %b to <8 x i16>
     25   %vbsl3.i = and <8 x i16> %0, %c
     26   %2 = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
     27   %vbsl4.i = and <8 x i16> %1, %2
     28   %vbsl5.i = or <8 x i16> %vbsl3.i, %vbsl4.i
     29   %3 = bitcast <8 x i16> %vbsl5.i to <8 x half>
     30   ret <8 x half> %3
     31 }
     32 
     33 ; float16x4_t lane_64_64(float16x4_t a, float16x4_t b) {
     34 ;  return vcopy_lane_s16(a, 1, b, 2);
     35 ; }
     36 define <4 x half> @lane_64_64(<4 x half> %a, <4 x half> %b) #0 {
     37 ; CHECK-LABEL: lane_64_64:
     38 ; CHECK: mov v{{[0-9]+}}.h
     39 entry:
     40   %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
     41   ret <4 x half> %0
     42 }
     43 
     44 ; float16x8_t lane_128_64(float16x8_t a, float16x4_t b) {
     45 ;   return vcopyq_lane_s16(a, 1, b, 2);
     46 ; }
     47 define <8 x half> @lane_128_64(<8 x half> %a, <4 x half> %b) #0 {
     48 ; CHECK-LABEL: lane_128_64:
     49 ; CHECK: mov v{{[0-9]+}}.h
     50 entry:
     51   %0 = bitcast <4 x half> %b to <4 x i16>
     52   %vget_lane = extractelement <4 x i16> %0, i32 2
     53   %1 = bitcast <8 x half> %a to <8 x i16>
     54   %vset_lane = insertelement <8 x i16> %1, i16 %vget_lane, i32 1
     55   %2 = bitcast <8 x i16> %vset_lane to <8 x half>
     56   ret <8 x half> %2
     57 }
     58 
     59 ; float16x4_t lane_64_128(float16x4_t a, float16x8_t b) {
     60 ;   return vcopy_laneq_s16(a, 3, b, 5);
     61 ; }
     62 define <4 x half> @lane_64_128(<4 x half> %a, <8 x half> %b) #0 {
     63 ; CHECK-LABEL: lane_64_128:
     64 ; CHECK: mov v{{[0-9]+}}.h
     65 entry:
     66   %0 = bitcast <8 x half> %b to <8 x i16>
     67   %vgetq_lane = extractelement <8 x i16> %0, i32 5
     68   %1 = bitcast <4 x half> %a to <4 x i16>
     69   %vset_lane = insertelement <4 x i16> %1, i16 %vgetq_lane, i32 3
     70   %2 = bitcast <4 x i16> %vset_lane to <4 x half>
     71   ret <4 x half> %2
     72 }
     73 
     74 ; float16x8_t lane_128_128(float16x8_t a, float16x8_t b) {
     75 ;   return vcopyq_laneq_s16(a, 3, b, 5);
     76 ; }
     77 define <8 x half> @lane_128_128(<8 x half> %a, <8 x half> %b) #0 {
     78 ; CHECK-LABEL: lane_128_128:
     79 ; CHECK: mov v{{[0-9]+}}.h
     80 entry:
     81   %0 = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7>
     82   ret <8 x half> %0
     83 }
     84 
     85 ; float16x4_t ext_64(float16x4_t a, float16x4_t b) {
     86 ;   return vext_s16(a, b, 3);
     87 ; }
     88 define <4 x half> @ext_64(<4 x half> %a, <4 x half> %b) #0 {
     89 ; CHECK-LABEL: ext_64:
     90 ; CHECK: ext
     91 entry:
     92   %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
     93   ret <4 x half> %0
     94 }
     95 
     96 ; float16x8_t ext_128(float16x8_t a, float16x8_t b) {
     97 ;   return vextq_s16(a, b, 3);
     98 ; }
     99 define <8 x half> @ext_128(<8 x half> %a, <8 x half> %b) #0 {
    100 ; CHECK-LABEL: ext_128:
    101 ; CHECK: ext
    102 entry:
    103   %0 = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
    104   ret <8 x half> %0
    105 }
    106 
    107 ; float16x4_t rev32_64(float16x4_t a) {
    108 ;   return vrev32_s16(a);
    109 ; }
    110 define <4 x half> @rev32_64(<4 x half> %a) #0 {
    111 entry:
    112 ; CHECK-LABEL: rev32_64:
    113 ; CHECK: rev32
    114   %0 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    115   ret <4 x half> %0
    116 }
    117 
    118 ; float16x4_t rev64_64(float16x4_t a) {
    119 ;   return vrev64_s16(a);
    120 ; }
    121 define <4 x half> @rev64_64(<4 x half> %a) #0 {
    122 entry:
    123 ; CHECK-LABEL: rev64_64:
    124 ; CHECK: rev64
    125   %0 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    126   ret <4 x half> %0
    127 }
    128 
    129 ; float16x8_t rev32_128(float16x8_t a) {
    130 ;   return vrev32q_s16(a);
    131 ; }
    132 define <8 x half> @rev32_128(<8 x half> %a) #0 {
    133 entry:
    134 ; CHECK-LABEL: rev32_128:
    135 ; CHECK: rev32
    136   %0 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    137   ret <8 x half> %0
    138 }
    139 
    140 ; float16x8_t rev64_128(float16x8_t a) {
    141 ;   return vrev64q_s16(a);
    142 ; }
    143 define <8 x half> @rev64_128(<8 x half> %a) #0 {
    144 entry:
    145 ; CHECK-LABEL: rev64_128:
    146 ; CHECK: rev64
    147   %0 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    148   ret <8 x half> %0
    149 }
    150 
    151 ; float16x4_t create_64(long long a) { return vcreate_f16(a); }
    152 define <4 x half> @create_64(i64 %a) #0 {
    153 ; CHECK-LABEL: create_64:
    154 ; CHECK: fmov
    155 entry:
    156   %0 = bitcast i64 %a to <4 x half>
    157   ret <4 x half> %0
    158 }
    159 
    160 ; float16x4_t dup_64(__fp16 a) { return vdup_n_f16(a); }
    161 define <4 x half> @dup_64(half %a) #0 {
    162 ; CHECK-LABEL: dup_64:
    163 ; CHECK: dup
    164 entry:
    165   %vecinit = insertelement <4 x half> undef, half %a, i32 0
    166   %vecinit1 = insertelement <4 x half> %vecinit, half %a, i32 1
    167   %vecinit2 = insertelement <4 x half> %vecinit1, half %a, i32 2
    168   %vecinit3 = insertelement <4 x half> %vecinit2, half %a, i32 3
    169   ret <4 x half> %vecinit3
    170 }
    171 
    172 ; float16x8_t dup_128(__fp16 a) { return vdupq_n_f16(a); }
    173 define <8 x half> @dup_128(half %a) #0 {
    174 entry:
    175 ; CHECK-LABEL: dup_128:
    176 ; CHECK: dup
    177   %vecinit = insertelement <8 x half> undef, half %a, i32 0
    178   %vecinit1 = insertelement <8 x half> %vecinit, half %a, i32 1
    179   %vecinit2 = insertelement <8 x half> %vecinit1, half %a, i32 2
    180   %vecinit3 = insertelement <8 x half> %vecinit2, half %a, i32 3
    181   %vecinit4 = insertelement <8 x half> %vecinit3, half %a, i32 4
    182   %vecinit5 = insertelement <8 x half> %vecinit4, half %a, i32 5
    183   %vecinit6 = insertelement <8 x half> %vecinit5, half %a, i32 6
    184   %vecinit7 = insertelement <8 x half> %vecinit6, half %a, i32 7
    185   ret <8 x half> %vecinit7
    186 }
    187 
    188 ; float16x4_t dup_lane_64(float16x4_t a) { return vdup_lane_f16(a, 2); }
    189 define <4 x half> @dup_lane_64(<4 x half> %a) #0 {
    190 entry:
    191 ; CHECK-LABEL: dup_lane_64:
    192 ; CHECK: dup
    193   %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    194   ret <4 x half> %shuffle
    195 }
    196 
    197 ; float16x8_t dup_lane_128(float16x4_t a) { return vdupq_lane_f16(a, 2); }
    198 define <8 x half> @dup_lane_128(<4 x half> %a) #0 {
    199 entry:
    200 ; CHECK-LABEL: dup_lane_128:
    201 ; CHECK: dup
    202   %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    203   ret <8 x half> %shuffle
    204 }
    205 
    206 ; float16x4_t dup_laneq_64(float16x8_t a) { return vdup_laneq_f16(a, 2); }
    207 define <4 x half> @dup_laneq_64(<8 x half> %a) #0 {
    208 entry:
    209 ; CHECK-LABEL: dup_laneq_64:
    210 ; CHECK: dup
    211   %shuffle = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    212   ret <4 x half> %shuffle
    213 }
    214 
    215 ; float16x8_t dup_laneq_128(float16x8_t a) { return vdupq_laneq_f16(a, 2); }
    216 define <8 x half> @dup_laneq_128(<8 x half> %a) #0 {
    217 entry:
    218 ; CHECK-LABEL: dup_laneq_128:
    219 ; CHECK: dup
    220   %shuffle = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    221   ret <8 x half> %shuffle
    222 }
    223 
    224 ; float16x8_t vcombine(float16x4_t a, float16x4_t b) { return vcombine_f16(a, b); }
    225 define <8 x half> @vcombine(<4 x half> %a, <4 x half> %b) #0 {
    226 entry:
    227 ; CHECK-LABEL: vcombine:
    228 ; CHECK: mov v0.d[1], v1.d[0]
    229   %shuffle.i = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    230   ret <8 x half> %shuffle.i
    231 }
    232 
    233 ; float16x4_t get_high(float16x8_t a) { return vget_high_f16(a); }
    234 define <4 x half> @get_high(<8 x half> %a) #0 {
    235 ; CHECK-LABEL: get_high:
    236 ; CHECK: ext
    237 entry:
    238   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    239   ret <4 x half> %shuffle.i
    240 }
    241 
    242 
    243 ; float16x4_t get_low(float16x8_t a) { return vget_low_f16(a); }
    244 define <4 x half> @get_low(<8 x half> %a) #0 {
    245 ; CHECK-LABEL: get_low:
    246 ; CHECK-NOT: ext
    247 entry:
    248   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    249   ret <4 x half> %shuffle.i
    250 }
    251 
    252 ; float16x4_t set_lane_64(float16x4_t a, __fp16 b) { return vset_lane_f16(b, a, 2); }
    253 define <4 x half> @set_lane_64(<4 x half> %a, half %b) #0 {
    254 ; CHECK-LABEL: set_lane_64:
    255 ; CHECK: fmov
    256 ; CHECK: mov v{{[0-9]+}}.h
    257 entry:
    258   %0 = bitcast half %b to i16
    259   %1 = bitcast <4 x half> %a to <4 x i16>
    260   %vset_lane = insertelement <4 x i16> %1, i16 %0, i32 2
    261   %2 = bitcast <4 x i16> %vset_lane to <4 x half>
    262   ret <4 x half> %2
    263 }
    264 
    265 
    266 ; float16x8_t set_lane_128(float16x8_t a, __fp16 b) { return vsetq_lane_f16(b, a, 2); }
    267 define <8 x half> @set_lane_128(<8 x half> %a, half %b) #0 {
    268 ; CHECK-LABEL: set_lane_128:
    269 ; CHECK: fmov
    270 ; CHECK: mov v{{[0-9]+}}.h
    271 entry:
    272   %0 = bitcast half %b to i16
    273   %1 = bitcast <8 x half> %a to <8 x i16>
    274   %vset_lane = insertelement <8 x i16> %1, i16 %0, i32 2
    275   %2 = bitcast <8 x i16> %vset_lane to <8 x half>
    276   ret <8 x half> %2
    277 }
    278 
    279 ; __fp16 get_lane_64(float16x4_t a) { return vget_lane_f16(a, 2); }
    280 define half @get_lane_64(<4 x half> %a) #0 {
    281 ; CHECK-LABEL: get_lane_64:
    282 ; CHECK: umov
    283 ; CHECK: fmov
    284 entry:
    285   %0 = bitcast <4 x half> %a to <4 x i16>
    286   %vget_lane = extractelement <4 x i16> %0, i32 2
    287   %1 = bitcast i16 %vget_lane to half
    288   ret half %1
    289 }
    290 
    291 ; __fp16 get_lane_128(float16x8_t a) { return vgetq_lane_f16(a, 2); }
    292 define half @get_lane_128(<8 x half> %a) #0 {
    293 ; CHECK-LABEL: get_lane_128:
    294 ; CHECK: umov
    295 ; CHECK: fmov
    296 entry:
    297   %0 = bitcast <8 x half> %a to <8 x i16>
    298   %vgetq_lane = extractelement <8 x i16> %0, i32 2
    299   %1 = bitcast i16 %vgetq_lane to half
    300   ret half %1
    301 }
    302