Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
      2 
      3 ; CHECK: variable_shl0
      4 ; CHECK: psllvd
      5 ; CHECK: ret
      6 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
      7   %k = shl <4 x i32> %x, %y
      8   ret <4 x i32> %k
      9 }
     10 ; CHECK: variable_shl1
     11 ; CHECK: psllvd
     12 ; CHECK: ret
     13 define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
     14   %k = shl <8 x i32> %x, %y
     15   ret <8 x i32> %k
     16 }
     17 ; CHECK: variable_shl2
     18 ; CHECK: psllvq
     19 ; CHECK: ret
     20 define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
     21   %k = shl <2 x i64> %x, %y
     22   ret <2 x i64> %k
     23 }
     24 ; CHECK: variable_shl3
     25 ; CHECK: psllvq
     26 ; CHECK: ret
     27 define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
     28   %k = shl <4 x i64> %x, %y
     29   ret <4 x i64> %k
     30 }
     31 ; CHECK: variable_srl0
     32 ; CHECK: psrlvd
     33 ; CHECK: ret
     34 define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
     35   %k = lshr <4 x i32> %x, %y
     36   ret <4 x i32> %k
     37 }
     38 ; CHECK: variable_srl1
     39 ; CHECK: psrlvd
     40 ; CHECK: ret
     41 define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
     42   %k = lshr <8 x i32> %x, %y
     43   ret <8 x i32> %k
     44 }
     45 ; CHECK: variable_srl2
     46 ; CHECK: psrlvq
     47 ; CHECK: ret
     48 define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
     49   %k = lshr <2 x i64> %x, %y
     50   ret <2 x i64> %k
     51 }
     52 ; CHECK: variable_srl3
     53 ; CHECK: psrlvq
     54 ; CHECK: ret
     55 define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
     56   %k = lshr <4 x i64> %x, %y
     57   ret <4 x i64> %k
     58 }
     59 
     60 ; CHECK: variable_sra0
     61 ; CHECK: vpsravd
     62 ; CHECK: ret
     63 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
     64   %k = ashr <4 x i32> %x, %y
     65   ret <4 x i32> %k
     66 }
     67 ; CHECK: variable_sra1
     68 ; CHECK: vpsravd
     69 ; CHECK: ret
     70 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
     71   %k = ashr <8 x i32> %x, %y
     72   ret <8 x i32> %k
     73 }
     74 
     75 ;;; Shift left
     76 ; CHECK: vpslld
     77 define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
     78   %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
     79 2>
     80   ret <8 x i32> %s
     81 }
     82 
     83 ; CHECK: vpsllw
     84 define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
     85   %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     86   ret <16 x i16> %s
     87 }
     88 
     89 ; CHECK: vpsllq
     90 define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
     91   %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
     92   ret <4 x i64> %s
     93 }
     94 
     95 ;;; Logical Shift right
     96 ; CHECK: vpsrld
     97 define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
     98   %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
     99 2>
    100   ret <8 x i32> %s
    101 }
    102 
    103 ; CHECK: vpsrlw
    104 define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
    105   %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    106   ret <16 x i16> %s
    107 }
    108 
    109 ; CHECK: vpsrlq
    110 define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
    111   %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
    112   ret <4 x i64> %s
    113 }
    114 
    115 ;;; Arithmetic Shift right
    116 ; CHECK: vpsrad
    117 define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
    118   %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
    119 2>
    120   ret <8 x i32> %s
    121 }
    122 
    123 ; CHECK: vpsraw
    124 define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
    125   %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    126   ret <16 x i16> %s
    127 }
    128 
    129 ; CHECK: variable_sra0_load
    130 ; CHECK: vpsravd (%
    131 ; CHECK: ret
    132 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
    133   %y1 = load <4 x i32>, <4 x i32>* %y
    134   %k = ashr <4 x i32> %x, %y1
    135   ret <4 x i32> %k
    136 }
    137 
    138 ; CHECK: variable_sra1_load
    139 ; CHECK: vpsravd (%
    140 ; CHECK: ret
    141 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
    142   %y1 = load <8 x i32>, <8 x i32>* %y
    143   %k = ashr <8 x i32> %x, %y1
    144   ret <8 x i32> %k
    145 }
    146 
    147 ; CHECK: variable_shl0_load
    148 ; CHECK: vpsllvd (%
    149 ; CHECK: ret
    150 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
    151   %y1 = load <4 x i32>, <4 x i32>* %y
    152   %k = shl <4 x i32> %x, %y1
    153   ret <4 x i32> %k
    154 }
    155 ; CHECK: variable_shl1_load
    156 ; CHECK: vpsllvd (%
    157 ; CHECK: ret
    158 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
    159   %y1 = load <8 x i32>, <8 x i32>* %y
    160   %k = shl <8 x i32> %x, %y1
    161   ret <8 x i32> %k
    162 }
    163 ; CHECK: variable_shl2_load
    164 ; CHECK: vpsllvq (%
    165 ; CHECK: ret
    166 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
    167   %y1 = load <2 x i64>, <2 x i64>* %y
    168   %k = shl <2 x i64> %x, %y1
    169   ret <2 x i64> %k
    170 }
    171 ; CHECK: variable_shl3_load
    172 ; CHECK: vpsllvq (%
    173 ; CHECK: ret
    174 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
    175   %y1 = load <4 x i64>, <4 x i64>* %y
    176   %k = shl <4 x i64> %x, %y1
    177   ret <4 x i64> %k
    178 }
    179 ; CHECK: variable_srl0_load
    180 ; CHECK: vpsrlvd (%
    181 ; CHECK: ret
    182 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
    183   %y1 = load <4 x i32>, <4 x i32>* %y
    184   %k = lshr <4 x i32> %x, %y1
    185   ret <4 x i32> %k
    186 }
    187 ; CHECK: variable_srl1_load
    188 ; CHECK: vpsrlvd (%
    189 ; CHECK: ret
    190 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
    191   %y1 = load <8 x i32>, <8 x i32>* %y
    192   %k = lshr <8 x i32> %x, %y1
    193   ret <8 x i32> %k
    194 }
    195 ; CHECK: variable_srl2_load
    196 ; CHECK: vpsrlvq (%
    197 ; CHECK: ret
    198 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
    199   %y1 = load <2 x i64>, <2 x i64>* %y
    200   %k = lshr <2 x i64> %x, %y1
    201   ret <2 x i64> %k
    202 }
    203 ; CHECK: variable_srl3_load
    204 ; CHECK: vpsrlvq (%
    205 ; CHECK: ret
    206 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
    207   %y1 = load <4 x i64>, <4 x i64>* %y
    208   %k = lshr <4 x i64> %x, %y1
    209   ret <4 x i64> %k
    210 }
    211 
    212 define <32 x i8> @shl9(<32 x i8> %A) nounwind {
    213   %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    214   ret <32 x i8> %B
    215 ; CHECK-LABEL: shl9:
    216 ; CHECK: vpsllw $3
    217 ; CHECK: vpand
    218 ; CHECK: ret
    219 }
    220 
    221 define <32 x i8> @shr9(<32 x i8> %A) nounwind {
    222   %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    223   ret <32 x i8> %B
    224 ; CHECK-LABEL: shr9:
    225 ; CHECK: vpsrlw $3
    226 ; CHECK: vpand
    227 ; CHECK: ret
    228 }
    229 
    230 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
    231   %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    232   ret <32 x i8> %B
    233 ; CHECK-LABEL: sra_v32i8_7:
    234 ; CHECK: vpxor
    235 ; CHECK: vpcmpgtb
    236 ; CHECK: ret
    237 }
    238 
    239 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
    240   %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    241   ret <32 x i8> %B
    242 ; CHECK-LABEL: sra_v32i8:
    243 ; CHECK: vpsrlw $3
    244 ; CHECK: vpand
    245 ; CHECK: vpxor
    246 ; CHECK: vpsubb
    247 ; CHECK: ret
    248 }
    249 
    250 ; CHECK: _sext_v16i16
    251 ; CHECK: vpsllw
    252 ; CHECK: vpsraw
    253 ; CHECK-NOT: vinsertf128
    254 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
    255   %b = trunc <16 x i16> %a to <16 x i8>
    256   %c = sext <16 x i8> %b to <16 x i16>
    257   ret <16 x i16> %c
    258 }
    259 
    260 ; CHECK: _sext_v8i32
    261 ; CHECK: vpslld
    262 ; CHECK: vpsrad
    263 ; CHECK-NOT: vinsertf128
    264 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
    265   %b = trunc <8 x i32> %a to <8 x i16>
    266   %c = sext <8 x i16> %b to <8 x i32>
    267   ret <8 x i32> %c
    268 }
    269 
    270 define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8  x i16> %rhs) {
    271 ; CHECK-LABEL: variable_shl16:
    272 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
    273 ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
    274 ; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
    275 ; CHECK: vpshufb
    276 ; CHECK: vpermq
    277   %res = shl <8 x i16> %lhs, %rhs
    278   ret <8 x i16> %res
    279 }
    280 
    281 define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8  x i16> %rhs) {
    282 ; CHECK-LABEL: variable_ashr16:
    283 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
    284 ; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
    285 ; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
    286 ; CHECK: vpshufb
    287 ; CHECK: vpermq
    288   %res = ashr <8 x i16> %lhs, %rhs
    289   ret <8 x i16> %res
    290 }
    291 
    292 define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8  x i16> %rhs) {
    293 ; CHECK-LABEL: variable_lshr16:
    294 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
    295 ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
    296 ; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
    297 ; CHECK: vpshufb
    298 ; CHECK: vpermq
    299   %res = lshr <8 x i16> %lhs, %rhs
    300   ret <8 x i16> %res
    301 }