Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; TODO: Add AVX512BW shift support
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
      4 
      5 ;
      6 ; Variable Shifts
      7 ;
      8 
      9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     10 ; ALL-LABEL: var_shift_v8i64:
     11 ; ALL:       ## BB#0:
     12 ; ALL-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0
     13 ; ALL-NEXT:    retq
     14   %shift = shl <8 x i64> %a, %b
     15   ret <8 x i64> %shift
     16 }
     17 
     18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
     19 ; ALL-LABEL: var_shift_v16i32:
     20 ; ALL:       ## BB#0:
     21 ; ALL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
     22 ; ALL-NEXT:    retq
     23   %shift = shl <16 x i32> %a, %b
     24   ret <16 x i32> %shift
     25 }
     26 
     27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
     28 ; ALL-LABEL: var_shift_v32i16:
     29 ; ALL:       ## BB#0:
     30 ; ALL-NEXT:    vpxor %ymm4, %ymm4, %ymm4
     31 ; ALL-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
     32 ; ALL-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
     33 ; ALL-NEXT:    vpsllvd %ymm5, %ymm6, %ymm5
     34 ; ALL-NEXT:    vpsrld $16, %ymm5, %ymm5
     35 ; ALL-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
     36 ; ALL-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
     37 ; ALL-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
     38 ; ALL-NEXT:    vpsrld $16, %ymm0, %ymm0
     39 ; ALL-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
     40 ; ALL-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
     41 ; ALL-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
     42 ; ALL-NEXT:    vpsllvd %ymm2, %ymm5, %ymm2
     43 ; ALL-NEXT:    vpsrld $16, %ymm2, %ymm2
     44 ; ALL-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
     45 ; ALL-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
     46 ; ALL-NEXT:    vpsllvd %ymm3, %ymm1, %ymm1
     47 ; ALL-NEXT:    vpsrld $16, %ymm1, %ymm1
     48 ; ALL-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
     49 ; ALL-NEXT:    retq
     50   %shift = shl <32 x i16> %a, %b
     51   ret <32 x i16> %shift
     52 }
     53 
     54 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
     55 ; ALL-LABEL: var_shift_v64i8:
     56 ; ALL:       ## BB#0:
     57 ; ALL-NEXT:    vpsllw $4, %ymm0, %ymm4
     58 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
     59 ; ALL-NEXT:    vpand %ymm5, %ymm4, %ymm4
     60 ; ALL-NEXT:    vpsllw $5, %ymm2, %ymm2
     61 ; ALL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     62 ; ALL-NEXT:    vpsllw $2, %ymm0, %ymm4
     63 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
     64 ; ALL-NEXT:    vpand %ymm6, %ymm4, %ymm4
     65 ; ALL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     66 ; ALL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     67 ; ALL-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
     68 ; ALL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     69 ; ALL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     70 ; ALL-NEXT:    vpsllw $4, %ymm1, %ymm2
     71 ; ALL-NEXT:    vpand %ymm5, %ymm2, %ymm2
     72 ; ALL-NEXT:    vpsllw $5, %ymm3, %ymm3
     73 ; ALL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     74 ; ALL-NEXT:    vpsllw $2, %ymm1, %ymm2
     75 ; ALL-NEXT:    vpand %ymm6, %ymm2, %ymm2
     76 ; ALL-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     77 ; ALL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     78 ; ALL-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
     79 ; ALL-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     80 ; ALL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     81 ; ALL-NEXT:    retq
     82   %shift = shl <64 x i8> %a, %b
     83   ret <64 x i8> %shift
     84 }
     85 
     86 ;
     87 ; Uniform Variable Shifts
     88 ;
     89 
     90 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     91 ; ALL-LABEL: splatvar_shift_v8i64:
     92 ; ALL:       ## BB#0:
     93 ; ALL-NEXT:    vpsllq %xmm1, %zmm0, %zmm0
     94 ; ALL-NEXT:    retq
     95   %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
     96   %shift = shl <8 x i64> %a, %splat
     97   ret <8 x i64> %shift
     98 }
     99 
    100 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
    101 ; ALL-LABEL: splatvar_shift_v16i32:
    102 ; ALL:       ## BB#0:
    103 ; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    104 ; ALL-NEXT:    vmovss %xmm1, %xmm2, %xmm1
    105 ; ALL-NEXT:    vpslld %xmm1, %zmm0, %zmm0
    106 ; ALL-NEXT:    retq
    107   %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
    108   %shift = shl <16 x i32> %a, %splat
    109   ret <16 x i32> %shift
    110 }
    111 
    112 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
    113 ; ALL-LABEL: splatvar_shift_v32i16:
    114 ; ALL:       ## BB#0:
    115 ; ALL-NEXT:    vmovd %xmm2, %eax
    116 ; ALL-NEXT:    movzwl %ax, %eax
    117 ; ALL-NEXT:    vmovd %eax, %xmm2
    118 ; ALL-NEXT:    vpsllw %xmm2, %ymm0, %ymm0
    119 ; ALL-NEXT:    vpsllw %xmm2, %ymm1, %ymm1
    120 ; ALL-NEXT:    retq
    121   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
    122   %shift = shl <32 x i16> %a, %splat
    123   ret <32 x i16> %shift
    124 }
    125 
    126 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    127 ; ALL-LABEL: splatvar_shift_v64i8:
    128 ; ALL:       ## BB#0:
    129 ; ALL-NEXT:    vpbroadcastb %xmm2, %ymm2
    130 ; ALL-NEXT:    vpsllw $4, %ymm0, %ymm3
    131 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    132 ; ALL-NEXT:    vpand %ymm4, %ymm3, %ymm3
    133 ; ALL-NEXT:    vpsllw $5, %ymm2, %ymm2
    134 ; ALL-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
    135 ; ALL-NEXT:    vpsllw $2, %ymm0, %ymm3
    136 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    137 ; ALL-NEXT:    vpand %ymm5, %ymm3, %ymm3
    138 ; ALL-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
    139 ; ALL-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
    140 ; ALL-NEXT:    vpaddb %ymm0, %ymm0, %ymm3
    141 ; ALL-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
    142 ; ALL-NEXT:    vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
    143 ; ALL-NEXT:    vpsllw $4, %ymm1, %ymm3
    144 ; ALL-NEXT:    vpand %ymm4, %ymm3, %ymm3
    145 ; ALL-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
    146 ; ALL-NEXT:    vpsllw $2, %ymm1, %ymm2
    147 ; ALL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    148 ; ALL-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    149 ; ALL-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
    150 ; ALL-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    151 ; ALL-NEXT:    retq
    152   %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
    153   %shift = shl <64 x i8> %a, %splat
    154   ret <64 x i8> %shift
    155 }
    156 
    157 ;
    158 ; Constant Shifts
    159 ;
    160 
    161 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
    162 ; ALL-LABEL: constant_shift_v8i64:
    163 ; ALL:       ## BB#0:
    164 ; ALL-NEXT:    vpsllvq {{.*}}(%rip), %zmm0, %zmm0
    165 ; ALL-NEXT:    retq
    166   %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
    167   ret <8 x i64> %shift
    168 }
    169 
    170 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
    171 ; ALL-LABEL: constant_shift_v16i32:
    172 ; ALL:       ## BB#0:
    173 ; ALL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
    174 ; ALL-NEXT:    retq
    175   %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
    176   ret <16 x i32> %shift
    177 }
    178 
    179 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
    180 ; ALL-LABEL: constant_shift_v32i16:
    181 ; ALL:       ## BB#0:
    182 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    183 ; ALL-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
    184 ; ALL-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
    185 ; ALL-NEXT:    retq
    186   %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
    187   ret <32 x i16> %shift
    188 }
    189 
    190 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
    191 ; ALL-LABEL: constant_shift_v64i8:
    192 ; ALL:       ## BB#0:
    193 ; ALL-NEXT:    vpsllw $4, %ymm0, %ymm2
    194 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    195 ; ALL-NEXT:    vpand %ymm3, %ymm2, %ymm2
    196 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
    197 ; ALL-NEXT:    vpsllw $5, %ymm4, %ymm4
    198 ; ALL-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
    199 ; ALL-NEXT:    vpsllw $2, %ymm0, %ymm2
    200 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    201 ; ALL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    202 ; ALL-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
    203 ; ALL-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
    204 ; ALL-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
    205 ; ALL-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
    206 ; ALL-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
    207 ; ALL-NEXT:    vpsllw $4, %ymm1, %ymm2
    208 ; ALL-NEXT:    vpand %ymm3, %ymm2, %ymm2
    209 ; ALL-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
    210 ; ALL-NEXT:    vpsllw $2, %ymm1, %ymm2
    211 ; ALL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    212 ; ALL-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    213 ; ALL-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
    214 ; ALL-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    215 ; ALL-NEXT:    retq
    216   %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    217   ret <64 x i8> %shift
    218 }
    219 
    220 ;
    221 ; Uniform Constant Shifts
    222 ;
    223 
    224 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
    225 ; ALL-LABEL: splatconstant_shift_v8i64:
    226 ; ALL:       ## BB#0:
    227 ; ALL-NEXT:    vpsllq $7, %zmm0, %zmm0
    228 ; ALL-NEXT:    retq
    229   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
    230   ret <8 x i64> %shift
    231 }
    232 
    233 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
    234 ; ALL-LABEL: splatconstant_shift_v16i32:
    235 ; ALL:       ## BB#0:
    236 ; ALL-NEXT:    vpslld $5, %zmm0, %zmm0
    237 ; ALL-NEXT:    retq
    238   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    239   ret <16 x i32> %shift
    240 }
    241 
    242 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
    243 ; ALL-LABEL: splatconstant_shift_v32i16:
    244 ; ALL:       ## BB#0:
    245 ; ALL-NEXT:    vpsllw $3, %ymm0, %ymm0
    246 ; ALL-NEXT:    vpsllw $3, %ymm1, %ymm1
    247 ; ALL-NEXT:    retq
    248   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    249   ret <32 x i16> %shift
    250 }
    251 
    252 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
    253 ; ALL-LABEL: splatconstant_shift_v64i8:
    254 ; ALL:       ## BB#0:
    255 ; ALL-NEXT:    vpsllw $3, %ymm0, %ymm0
    256 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
    257 ; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
    258 ; ALL-NEXT:    vpsllw $3, %ymm1, %ymm1
    259 ; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    260 ; ALL-NEXT:    retq
    261   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    262   ret <64 x i8> %shift
    263 }
    264