Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefixes=ALL,KNL %s
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 define <32 x i16> @shuffle_v32i16(<32 x i16> %a)  {
      8 ; KNL-LABEL: shuffle_v32i16:
      9 ; KNL:       ## %bb.0:
     10 ; KNL-NEXT:    vpbroadcastw %xmm0, %ymm0
     11 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
     12 ; KNL-NEXT:    retq
     13 ;
     14 ; SKX-LABEL: shuffle_v32i16:
     15 ; SKX:       ## %bb.0:
     16 ; SKX-NEXT:    vpbroadcastw %xmm0, %zmm0
     17 ; SKX-NEXT:    retq
     18   %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> zeroinitializer
     19   ret <32 x i16> %c
     20 }
     21 
     22 define <32 x i16> @shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<32 x i16> %a)  {
     23 ; KNL-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
     24 ; KNL:       ## %bb.0:
     25 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
     26 ; KNL-NEXT:    vpbroadcastw %xmm0, %ymm0
     27 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
     28 ; KNL-NEXT:    retq
     29 ;
     30 ; SKX-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
     31 ; SKX:       ## %bb.0:
     32 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
     33 ; SKX-NEXT:    vpbroadcastw %xmm0, %zmm0
     34 ; SKX-NEXT:    retq
     35   %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
     36   ret <32 x i16> %c
     37 }
     38 
     39 define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a)  {
     40 ; KNL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
     41 ; KNL:       ## %bb.0:
     42 ; KNL-NEXT:    vpshufb {{.*#+}} ymm2 = ymm0[4,5,10,11,4,5,6,7,14,15,2,3,4,5,2,3,20,21,26,27,20,21,22,23,30,31,18,19,20,21,18,19]
     43 ; KNL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
     44 ; KNL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm3[0,1,10,11,8,9,8,9,14,15,2,3,4,5,2,3,16,17,26,27,24,25,24,25,30,31,18,19,20,21,18,19]
     45 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm4 = <0,0,0,0,u,u,u,u,0,0,u,u,255,255,0,0,255,255,255,255,u,u,255,255,255,255,u,u,0,0,255,255>
     46 ; KNL-NEXT:    vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
     47 ; KNL-NEXT:    vpshufb {{.*#+}} ymm3 = ymm3[0,1,10,11,8,9,8,9,14,15,6,7,4,5,14,15,16,17,26,27,24,25,24,25,30,31,22,23,20,21,30,31]
     48 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm4 = <255,255,255,255,u,u,u,u,255,255,u,u,0,0,255,255,0,0,0,0,u,u,0,0,0,0,u,u,255,255,u,u>
     49 ; KNL-NEXT:    vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
     50 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,u,u,255,255,u,u,255,255,255,255,255,255,255,255,u,u,255,255,255,255,u,u,255,255,0,0>
     51 ; KNL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     52 ; KNL-NEXT:    retq
     53 ;
     54 ; SKX-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
     55 ; SKX:       ## %bb.0:
     56 ; SKX-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31>
     57 ; SKX-NEXT:    vpermw %zmm0, %zmm1, %zmm0
     58 ; SKX-NEXT:    retq
     59   %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1, i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 31>
     60   ret <32 x i16> %c
     61 }
     62 
     63 define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38(<32 x i16> %a, <32 x i16> %b)  {
     64 ; KNL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
     65 ; KNL:       ## %bb.0:
     66 ; KNL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
     67 ; KNL-NEXT:    vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6],ymm2[7],ymm1[8,9,10,11],ymm2[12,13],ymm1[14],ymm2[15]
     68 ; KNL-NEXT:    vpshufb {{.*#+}} ymm1 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u]
     69 ; KNL-NEXT:    vpermq {{.*#+}} ymm4 = ymm0[2,3,0,1]
     70 ; KNL-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5,6,7],ymm0[8,9,10,11,12],ymm4[13,14,15]
     71 ; KNL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u]
     72 ; KNL-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
     73 ; KNL-NEXT:    vextracti128 $1, %ymm3, %xmm3
     74 ; KNL-NEXT:    vpbroadcastw %xmm3, %ymm3
     75 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
     76 ; KNL-NEXT:    vpblendvb %ymm4, %ymm1, %ymm3, %ymm1
     77 ; KNL-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17]
     78 ; KNL-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
     79 ; KNL-NEXT:    retq
     80 ;
     81 ; SKX-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
     82 ; SKX:       ## %bb.0:
     83 ; SKX-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24,15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,56]
     84 ; SKX-NEXT:    vpermt2w %zmm1, %zmm2, %zmm0
     85 ; SKX-NEXT:    retq
     86   %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24, i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 56>
     87   ret <32 x i16> %c
     88 }
     89 
     90 define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i16> %a, <32 x i16> %b)  {
     91 ; KNL-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
     92 ; KNL:       ## %bb.0:
     93 ; KNL-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11]
     94 ; KNL-NEXT:    retq
     95 ;
     96 ; SKX-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
     97 ; SKX:       ## %bb.0:
     98 ; SKX-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
     99 ; SKX-NEXT:    retq
    100   %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    101   ret <32 x i16> %c
    102 }
    103 
    104 define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x i16> %a, <32 x i16> %b)  {
    105 ; KNL-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
    106 ; KNL:       ## %bb.0:
    107 ; KNL-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15]
    108 ; KNL-NEXT:    retq
    109 ;
    110 ; SKX-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
    111 ; SKX:       ## %bb.0:
    112 ; SKX-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
    113 ; SKX-NEXT:    retq
    114   %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    115   ret <32 x i16> %c
    116 }
    117 
    118 define <32 x i16> @shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z(<32 x i16> %a, <32 x i16> %b)  {
    119 ; KNL-LABEL: shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z:
    120 ; KNL:       ## %bb.0:
    121 ; KNL-NEXT:    vpsrld $16, %ymm0, %ymm0
    122 ; KNL-NEXT:    vpsrld $16, %ymm1, %ymm1
    123 ; KNL-NEXT:    retq
    124 ;
    125 ; SKX-LABEL: shuffle_v32i16_1_z_3_z_5_z_7_z_9_z_11_z_13_z_15_z_17_z_19_z_21_z_23_z_25_z_27_z_29_z_31_z:
    126 ; SKX:       ## %bb.0:
    127 ; SKX-NEXT:    vpsrld $16, %zmm0, %zmm0
    128 ; SKX-NEXT:    retq
    129   %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 34, i32 3, i32 34, i32 5, i32 34, i32 7, i32 34, i32 9, i32 34, i32 11, i32 34, i32 13, i32 34, i32 15, i32 34, i32 17, i32 34, i32 19, i32 34, i32 21, i32 34, i32 23, i32 34, i32 25, i32 34, i32 27, i32 34, i32 29, i32 34, i32 31, i32 34>
    130   ret <32 x i16> %c
    131 }
    132 
    133 define <32 x i16> @shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30(<32 x i16> %a, <32 x i16> %b)  {
    134 ; KNL-LABEL: shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30:
    135 ; KNL:       ## %bb.0:
    136 ; KNL-NEXT:    vpslld $16, %ymm0, %ymm0
    137 ; KNL-NEXT:    vpslld $16, %ymm1, %ymm1
    138 ; KNL-NEXT:    retq
    139 ;
    140 ; SKX-LABEL: shuffle_v32i16_z_0_z_2_z_4_z_6_z_8_z_10_z_12_z_14_z_16_z_18_z_20_z_22_z_24_z_26_z_28_z_30:
    141 ; SKX:       ## %bb.0:
    142 ; SKX-NEXT:    vpslld $16, %zmm0, %zmm0
    143 ; SKX-NEXT:    retq
    144   %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 34, i32 0, i32 34, i32 2, i32 34, i32 4, i32 34, i32 6, i32 34, i32 8, i32 34, i32 10, i32 34, i32 12, i32 34, i32 14, i32 34, i32 16, i32 34, i32 18, i32 34, i32 20, i32 34, i32 22, i32 34, i32 24, i32 34, i32 26, i32 34, i32 28, i32 34, i32 30>
    145   ret <32 x i16> %c
    146 }
    147 
    148 define <32 x i16> @shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31(<32 x i16> %a, <32 x i16> %b)  {
    149 ; KNL-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31:
    150 ; KNL:       ## %bb.0:
    151 ; KNL-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
    152 ; KNL-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
    153 ; KNL-NEXT:    retq
    154 ;
    155 ; SKX-LABEL: shuffle_v32i16_1_1_0_0_4_5_6_7_9_9_8_8_12_13_14_15_17_17_16_16_20_21_22_23_25_25_24_24_28_29_30_31:
    156 ; SKX:       ## %bb.0:
    157 ; SKX-NEXT:    vpshuflw {{.*#+}} zmm0 = zmm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15,17,17,16,16,20,21,22,23,25,25,24,24,28,29,30,31]
    158 ; SKX-NEXT:    retq
    159   %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
    160   ret <32 x i16> %c
    161 }
    162 
    163 define <32 x i16> @shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b)  {
    164 ; KNL-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28:
    165 ; KNL:       ## %bb.0:
    166 ; KNL-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
    167 ; KNL-NEXT:    vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
    168 ; KNL-NEXT:    retq
    169 ;
    170 ; SKX-LABEL: shuffle_v32i16_0_1_2_3_5_5_4_4_8_9_10_11_13_13_12_12_16_17_18_19_21_21_20_20_24_25_26_27_29_29_28_28:
    171 ; SKX:       ## %bb.0:
    172 ; SKX-NEXT:    vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12,16,17,18,19,21,21,20,20,24,25,26,27,29,29,28,28]
    173 ; SKX-NEXT:    retq
    174   %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
    175   ret <32 x i16> %c
    176 }
    177 
    178 define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28(<32 x i16> %a, <32 x i16> %b)  {
    179 ; KNL-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28:
    180 ; KNL:       ## %bb.0:
    181 ; KNL-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
    182 ; KNL-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
    183 ; KNL-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15]
    184 ; KNL-NEXT:    vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12]
    185 ; KNL-NEXT:    retq
    186 ;
    187 ; SKX-LABEL: shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19_19_21_21_20_20_25_25_27_27_29_29_28_28:
    188 ; SKX:       ## %bb.0:
    189 ; SKX-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[2,3,2,3,0,1,0,1,10,11,10,11,8,9,8,9,18,19,18,19,16,17,16,17,26,27,26,27,24,25,24,25,34,35,34,35,32,33,32,33,42,43,42,43,40,41,40,41,50,51,50,51,48,49,48,49,58,59,58,59,56,57,56,57]
    190 ; SKX-NEXT:    retq
    191   %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 5, i32 5, i32 4, i32 4, i32 9, i32 9, i32 8, i32 8, i32 13, i32 13, i32 12, i32 12, i32 17, i32 17, i32 16, i32 16, i32 21, i32 21, i32 20, i32 20, i32 25, i32 25, i32 24, i32 24, i32 29, i32 29, i32 28, i32 28>
    192   ret <32 x i16> %c
    193 }
    194 
    195 define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) {
    196 ; KNL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
    197 ; KNL:       ## %bb.0:
    198 ; KNL-NEXT:    movl $65535, %eax ## imm = 0xFFFF
    199 ; KNL-NEXT:    vmovd %eax, %xmm1
    200 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
    201 ; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    202 ; KNL-NEXT:    retq
    203 ;
    204 ; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
    205 ; SKX:       ## %bb.0:
    206 ; SKX-NEXT:    movl $1, %eax
    207 ; SKX-NEXT:    kmovd %eax, %k1
    208 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
    209 ; SKX-NEXT:    retq
    210   %shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
    211   ret <32 x i16> %shuffle
    212 }
    213 
    214 define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {
    215 ; KNL-LABEL: insert_dup_mem_v32i16_i32:
    216 ; KNL:       ## %bb.0:
    217 ; KNL-NEXT:    vpbroadcastw (%rdi), %ymm0
    218 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
    219 ; KNL-NEXT:    retq
    220 ;
    221 ; SKX-LABEL: insert_dup_mem_v32i16_i32:
    222 ; SKX:       ## %bb.0:
    223 ; SKX-NEXT:    movl (%rdi), %eax
    224 ; SKX-NEXT:    vpbroadcastw %eax, %zmm0
    225 ; SKX-NEXT:    retq
    226   %tmp = load i32, i32* %ptr, align 4
    227   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
    228   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
    229   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> zeroinitializer
    230   ret <32 x i16> %tmp3
    231 }
    232 
    233 define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
    234 ; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
    235 ; KNL:       ## %bb.0:
    236 ; KNL-NEXT:    movswl (%rdi), %eax
    237 ; KNL-NEXT:    vmovd %eax, %xmm0
    238 ; KNL-NEXT:    vpbroadcastw %xmm0, %ymm0
    239 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
    240 ; KNL-NEXT:    retq
    241 ;
    242 ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
    243 ; SKX:       ## %bb.0:
    244 ; SKX-NEXT:    movswl (%rdi), %eax
    245 ; SKX-NEXT:    vpbroadcastw %eax, %zmm0
    246 ; SKX-NEXT:    retq
    247   %tmp = load i16, i16* %ptr, align 2
    248   %tmp1 = sext i16 %tmp to i32
    249   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
    250   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
    251   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <32 x i32> zeroinitializer
    252   ret <32 x i16> %tmp4
    253 }
    254 
    255 define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 {
    256 ; KNL-LABEL: insert_dup_elt1_mem_v32i16_i32:
    257 ; KNL:       ## %bb.0:
    258 ; KNL-NEXT:    vpbroadcastw 2(%rdi), %ymm0
    259 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
    260 ; KNL-NEXT:    retq
    261 ;
    262 ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32:
    263 ; SKX:       ## %bb.0:
    264 ; SKX-NEXT:    movzwl 2(%rdi), %eax
    265 ; SKX-NEXT:    vpbroadcastw %eax, %zmm0
    266 ; SKX-NEXT:    retq
    267   %tmp = load i32, i32* %ptr, align 4
    268   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
    269   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
    270   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    271   ret <32 x i16> %tmp3
    272 }
    273 
    274 define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 {
    275 ; KNL-LABEL: insert_dup_elt3_mem_v32i16_i32:
    276 ; KNL:       ## %bb.0:
    277 ; KNL-NEXT:    vpbroadcastw 2(%rdi), %ymm0
    278 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
    279 ; KNL-NEXT:    retq
    280 ;
    281 ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32:
    282 ; SKX:       ## %bb.0:
    283 ; SKX-NEXT:    movzwl 2(%rdi), %eax
    284 ; SKX-NEXT:    vpbroadcastw %eax, %zmm0
    285 ; SKX-NEXT:    retq
    286   %tmp = load i32, i32* %ptr, align 4
    287   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
    288   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
    289   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    290   ret <32 x i16> %tmp3
    291 }
    292 
    293 define <32 x i16> @shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i16> %a) {
    294 ; KNL-LABEL: shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
    295 ; KNL:       ## %bb.0:
    296 ; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    297 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    298 ; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    299 ; KNL-NEXT:    vmovdqa %ymm2, %ymm0
    300 ; KNL-NEXT:    retq
    301 ;
    302 ; SKX-LABEL: shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
    303 ; SKX:       ## %bb.0:
    304 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    305 ; SKX-NEXT:    retq
    306   %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0>
    307   ret <32 x i16> %shuffle
    308 }
    309 
    310 define <32 x i16> @shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i16> %a) {
    311 ; KNL-LABEL: shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
    312 ; KNL:       ## %bb.0:
    313 ; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    314 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    315 ; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    316 ; KNL-NEXT:    vmovdqa %ymm2, %ymm0
    317 ; KNL-NEXT:    retq
    318 ;
    319 ; SKX-LABEL: shuffle_v32i16_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
    320 ; SKX:       ## %bb.0:
    321 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    322 ; SKX-NEXT:    retq
    323   %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0>
    324   ret <32 x i16> %shuffle
    325 }
    326 
    327 define <8 x i16> @pr32967(<32 x i16> %v) {
    328 ; KNL-LABEL: pr32967:
    329 ; KNL:       ## %bb.0:
    330 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
    331 ; KNL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
    332 ; KNL-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7]
    333 ; KNL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    334 ; KNL-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7]
    335 ; KNL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    336 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
    337 ; KNL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
    338 ; KNL-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7]
    339 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    340 ; KNL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
    341 ; KNL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    342 ; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    343 ; KNL-NEXT:    retq
    344 ;
    345 ; SKX-LABEL: pr32967:
    346 ; SKX:       ## %bb.0:
    347 ; SKX-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
    348 ; SKX-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u>
    349 ; SKX-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1
    350 ; SKX-NEXT:    vmovdqa %xmm1, %xmm0
    351 ; SKX-NEXT:    vzeroupper
    352 ; SKX-NEXT:    retq
    353  %shuffle = shufflevector <32 x i16> %v, <32 x i16> undef, <8 x i32> <i32 1,i32 5,i32 9,i32 13,i32 17,i32 21,i32 25,i32 29>
    354  ret <8 x i16> %shuffle
    355 }
    356 
    357 define <32 x i16> @shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz(<32 x i16> %a) {
    358 ; KNL-LABEL: shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz:
    359 ; KNL:       ## %bb.0:
    360 ; KNL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15],zero,zero,ymm0[10,11],zero,zero,ymm0[6,7],zero,zero,ymm0[2,3],zero,zero,ymm0[30,31],zero,zero,ymm0[26,27],zero,zero,ymm0[22,23],zero,zero,ymm0[18,19],zero,zero
    361 ; KNL-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[14,15],zero,zero,ymm1[10,11],zero,zero,ymm1[6,7],zero,zero,ymm1[2,3],zero,zero,ymm1[30,31],zero,zero,ymm1[26,27],zero,zero,ymm1[22,23],zero,zero,ymm1[20,21],zero,zero
    362 ; KNL-NEXT:    retq
    363 ;
    364 ; SKX-LABEL: shuffle_v32i16_07_zz_05_zz_03_zz_01_zz_15_zz_13_zz_11_zz_09_zz_23_zz_21_zz_19_zz_17_zz_31_zz_29_zz_27_zz_25_zz:
    365 ; SKX:       ## %bb.0:
    366 ; SKX-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[14,15],zero,zero,zmm0[10,11],zero,zero,zmm0[6,7],zero,zero,zmm0[2,3],zero,zero,zmm0[30,31],zero,zero,zmm0[26,27],zero,zero,zmm0[22,23],zero,zero,zmm0[18,19],zero,zero,zmm0[46,47],zero,zero,zmm0[42,43],zero,zero,zmm0[38,39],zero,zero,zmm0[34,35],zero,zero,zmm0[62,63],zero,zero,zmm0[58,59],zero,zero,zmm0[54,55],zero,zero,zmm0[52,53],zero,zero
    367 ; SKX-NEXT:    retq
    368   %shuffle = shufflevector <32 x i16> zeroinitializer, <32 x i16> %a, <32 x i32> <i32 39, i32 0, i32 37, i32 0, i32 35, i32 0, i32 33, i32 0, i32 47, i32 0, i32 45, i32 0, i32 43, i32 0, i32 41, i32 0, i32 55, i32 0, i32 53, i32 0, i32 51, i32 0, i32 49, i32 0, i32 63, i32 0, i32 61, i32 0, i32 59, i32 0, i32 58, i32 0>
    369   ret <32 x i16> %shuffle
    370 }
    371