Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=X32
      3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=X64
      4 
      5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
      6 
      7 define <8 x i64> @test_mm512_broadcastb_epi8(<2 x i64> %a0) {
      8 ; X32-LABEL: test_mm512_broadcastb_epi8:
      9 ; X32:       # BB#0:
     10 ; X32-NEXT:    vpbroadcastb %xmm0, %zmm0
     11 ; X32-NEXT:    retl
     12 ;
     13 ; X64-LABEL: test_mm512_broadcastb_epi8:
     14 ; X64:       # BB#0:
     15 ; X64-NEXT:    vpbroadcastb %xmm0, %zmm0
     16 ; X64-NEXT:    retq
     17   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
     18   %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <64 x i32> zeroinitializer
     19   %res1 = bitcast <64 x i8> %res0 to <8 x i64>
     20   ret <8 x i64> %res1
     21 }
     22 
     23 define <8 x i64> @test_mm512_mask_broadcastb_epi8(<8 x i64> %a0, i64* %a1, <2 x i64> %a2) {
     24 ; X32-LABEL: test_mm512_mask_broadcastb_epi8:
     25 ; X32:       # BB#0:
     26 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     27 ; X32-NEXT:    kmovq (%eax), %k1
     28 ; X32-NEXT:    vpbroadcastb %xmm1, %zmm0 {%k1}
     29 ; X32-NEXT:    retl
     30 ;
     31 ; X64-LABEL: test_mm512_mask_broadcastb_epi8:
     32 ; X64:       # BB#0:
     33 ; X64-NEXT:    kmovq (%rdi), %k1
     34 ; X64-NEXT:    vpbroadcastb %xmm1, %zmm0 {%k1}
     35 ; X64-NEXT:    retq
     36   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
     37   %bc1 = bitcast i64* %a1 to <64 x i1>*
     38   %arg1 = load <64 x i1>, <64 x i1>* %bc1
     39   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
     40   %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <64 x i32> zeroinitializer
     41   %res1 = select <64 x i1> %arg1, <64 x i8> %res0, <64 x i8> %arg0
     42   %res2 = bitcast <64 x i8> %res1 to <8 x i64>
     43   ret <8 x i64> %res2
     44 }
     45 
     46 define <8 x i64> @test_mm512_maskz_broadcastb_epi8(i64* %a0, <2 x i64> %a1) {
     47 ; X32-LABEL: test_mm512_maskz_broadcastb_epi8:
     48 ; X32:       # BB#0:
     49 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     50 ; X32-NEXT:    kmovq (%eax), %k1
     51 ; X32-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z}
     52 ; X32-NEXT:    retl
     53 ;
     54 ; X64-LABEL: test_mm512_maskz_broadcastb_epi8:
     55 ; X64:       # BB#0:
     56 ; X64-NEXT:    kmovq (%rdi), %k1
     57 ; X64-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z}
     58 ; X64-NEXT:    retq
     59   %bc0 = bitcast i64* %a0 to <64 x i1>*
     60   %arg0 = load <64 x i1>, <64 x i1>* %bc0
     61   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
     62   %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <64 x i32> zeroinitializer
     63   %res1 = select <64 x i1> %arg0, <64 x i8> %res0, <64 x i8> zeroinitializer
     64   %res2 = bitcast <64 x i8> %res1 to <8 x i64>
     65   ret <8 x i64> %res2
     66 }
     67 
     68 define <8 x i64> @test_mm512_broadcastw_epi16(<2 x i64> %a0) {
     69 ; X32-LABEL: test_mm512_broadcastw_epi16:
     70 ; X32:       # BB#0:
     71 ; X32-NEXT:    vpbroadcastw %xmm0, %zmm0
     72 ; X32-NEXT:    retl
     73 ;
     74 ; X64-LABEL: test_mm512_broadcastw_epi16:
     75 ; X64:       # BB#0:
     76 ; X64-NEXT:    vpbroadcastw %xmm0, %zmm0
     77 ; X64-NEXT:    retq
     78   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
     79   %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <32 x i32> zeroinitializer
     80   %res1 = bitcast <32 x i16> %res0 to <8 x i64>
     81   ret <8 x i64> %res1
     82 }
     83 
     84 define <8 x i64> @test_mm512_mask_broadcastw_epi16(<8 x i64> %a0, i32 %a1, <2 x i64> %a2) {
     85 ; X32-LABEL: test_mm512_mask_broadcastw_epi16:
     86 ; X32:       # BB#0:
     87 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     88 ; X32-NEXT:    kmovd %eax, %k1
     89 ; X32-NEXT:    vpbroadcastw %xmm1, %zmm0 {%k1}
     90 ; X32-NEXT:    retl
     91 ;
     92 ; X64-LABEL: test_mm512_mask_broadcastw_epi16:
     93 ; X64:       # BB#0:
     94 ; X64-NEXT:    kmovd %edi, %k1
     95 ; X64-NEXT:    vpbroadcastw %xmm1, %zmm0 {%k1}
     96 ; X64-NEXT:    retq
     97   %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
     98   %arg1 = bitcast i32 %a1 to <32 x i1>
     99   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
    100   %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <32 x i32> zeroinitializer
    101   %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
    102   %res2 = bitcast <32 x i16> %res1 to <8 x i64>
    103   ret <8 x i64> %res2
    104 }
    105 
    106 define <8 x i64> @test_mm512_maskz_broadcastw_epi16(i32 %a0, <2 x i64> %a1) {
    107 ; X32-LABEL: test_mm512_maskz_broadcastw_epi16:
    108 ; X32:       # BB#0:
    109 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    110 ; X32-NEXT:    kmovd %eax, %k1
    111 ; X32-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z}
    112 ; X32-NEXT:    retl
    113 ;
    114 ; X64-LABEL: test_mm512_maskz_broadcastw_epi16:
    115 ; X64:       # BB#0:
    116 ; X64-NEXT:    kmovd %edi, %k1
    117 ; X64-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z}
    118 ; X64-NEXT:    retq
    119   %arg0 = bitcast i32 %a0 to <32 x i1>
    120   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    121   %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <32 x i32> zeroinitializer
    122   %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
    123   %res2 = bitcast <32 x i16> %res1 to <8 x i64>
    124   ret <8 x i64> %res2
    125 }
    126 
    127 define <8 x i64> @test_mm512_bslli_epi128(<8 x i64> %a0) {
    128 ; X32-LABEL: test_mm512_bslli_epi128:
    129 ; X32:       # BB#0:
    130 ; X32-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    131 ; X32-NEXT:    retl
    132 ;
    133 ; X64-LABEL: test_mm512_bslli_epi128:
    134 ; X64:       # BB#0:
    135 ; X64-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    136 ; X64-NEXT:    retq
    137   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
    138   %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122>
    139   %res1 = bitcast <64 x i8> %res0 to <8 x i64>
    140   ret <8 x i64> %res1
    141 }
    142 
    143 define <8 x i64> @test_mm512_bsrli_epi128(<8 x i64> %a0) {
    144 ; X32-LABEL: test_mm512_bsrli_epi128:
    145 ; X32:       # BB#0:
    146 ; X32-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zmm0[21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zmm0[37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zmm0[53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero
    147 ; X32-NEXT:    retl
    148 ;
    149 ; X64-LABEL: test_mm512_bsrli_epi128:
    150 ; X64:       # BB#0:
    151 ; X64-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zmm0[21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zmm0[37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zmm0[53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero
    152 ; X64-NEXT:    retq
    153   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
    154   %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116>
    155   %res1 = bitcast <64 x i8> %res0 to <8 x i64>
    156   ret <8 x i64> %res1
    157 }
    158 
    159 define <8 x i64> @test_mm512_unpackhi_epi8(<8 x i64> %a0, <8 x i64> %a1) {
    160 ; X32-LABEL: test_mm512_unpackhi_epi8:
    161 ; X32:       # BB#0:
    162 ; X32-NEXT:    vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    163 ; X32-NEXT:    retl
    164 ;
    165 ; X64-LABEL: test_mm512_unpackhi_epi8:
    166 ; X64:       # BB#0:
    167 ; X64-NEXT:    vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    168 ; X64-NEXT:    retq
    169   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
    170   %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
    171   %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
    172   %res1 = bitcast <64 x i8> %res0 to <8 x i64>
    173   ret <8 x i64> %res1
    174 }
    175 
    176 ; TODO - improve support for i64 -> mmask64 on 32-bit targets
    177 define <8 x i64> @test_mm512_mask_unpackhi_epi8(<8 x i64> %a0, i64* %a1, <8 x i64> %a2, <8 x i64> %a3) {
    178 ; X32-LABEL: test_mm512_mask_unpackhi_epi8:
    179 ; X32:       # BB#0:
    180 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    181 ; X32-NEXT:    kmovq (%eax), %k1
    182 ; X32-NEXT:    vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
    183 ; X32-NEXT:    retl
    184 ;
    185 ; X64-LABEL: test_mm512_mask_unpackhi_epi8:
    186 ; X64:       # BB#0:
    187 ; X64-NEXT:    kmovq (%rdi), %k1
    188 ; X64-NEXT:    vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
    189 ; X64-NEXT:    retq
    190   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
    191   %arg1 = bitcast i64* %a1 to <64 x i1>*
    192   %sel1 = load <64 x i1>, <64 x i1>* %arg1
    193   %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
    194   %arg3 = bitcast <8 x i64> %a3 to <64 x i8>
    195   %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
    196   %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0
    197   %res2 = bitcast <64 x i8> %res1 to <8 x i64>
    198   ret <8 x i64> %res2
    199 }
    200 
    201 define <8 x i64> @test_mm512_maskz_unpackhi_epi8(i64* %a0, <8 x i64> %a1, <8 x i64> %a2) {
    202 ; X32-LABEL: test_mm512_maskz_unpackhi_epi8:
    203 ; X32:       # BB#0:
    204 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    205 ; X32-NEXT:    kmovq (%eax), %k1
    206 ; X32-NEXT:    vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    207 ; X32-NEXT:    retl
    208 ;
    209 ; X64-LABEL: test_mm512_maskz_unpackhi_epi8:
    210 ; X64:       # BB#0:
    211 ; X64-NEXT:    kmovq (%rdi), %k1
    212 ; X64-NEXT:    vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    213 ; X64-NEXT:    retq
    214   %arg0 = bitcast i64* %a0 to <64 x i1>*
    215   %sel0 = load <64 x i1>, <64 x i1>* %arg0
    216   %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
    217   %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
    218   %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
    219   %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer
    220   %res2 = bitcast <64 x i8> %res1 to <8 x i64>
    221   ret <8 x i64> %res2
    222 }
    223 
    224 define <8 x i64> @test_mm512_unpackhi_epi16(<8 x i64> %a0, <8 x i64> %a1) {
    225 ; X32-LABEL: test_mm512_unpackhi_epi16:
    226 ; X32:       # BB#0:
    227 ; X32-NEXT:    vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    228 ; X32-NEXT:    retl
    229 ;
    230 ; X64-LABEL: test_mm512_unpackhi_epi16:
    231 ; X64:       # BB#0:
    232 ; X64-NEXT:    vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    233 ; X64-NEXT:    retq
    234   %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
    235   %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
    236   %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
    237   %res1 = bitcast <32 x i16> %res0 to <8 x i64>
    238   ret <8 x i64> %res1
    239 }
    240 
    241 define <8 x i64> @test_mm512_mask_unpackhi_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) {
    242 ; X32-LABEL: test_mm512_mask_unpackhi_epi16:
    243 ; X32:       # BB#0:
    244 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    245 ; X32-NEXT:    kmovd %eax, %k1
    246 ; X32-NEXT:    vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
    247 ; X32-NEXT:    retl
    248 ;
    249 ; X64-LABEL: test_mm512_mask_unpackhi_epi16:
    250 ; X64:       # BB#0:
    251 ; X64-NEXT:    kmovd %edi, %k1
    252 ; X64-NEXT:    vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
    253 ; X64-NEXT:    retq
    254   %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
    255   %arg1 = bitcast i32 %a1 to <32 x i1>
    256   %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
    257   %arg3 = bitcast <8 x i64> %a3 to <32 x i16>
    258   %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
    259   %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
    260   %res2 = bitcast <32 x i16> %res1 to <8 x i64>
    261   ret <8 x i64> %res2
    262 }
    263 
    264 define <8 x i64> @test_mm512_maskz_unpackhi_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) {
    265 ; X32-LABEL: test_mm512_maskz_unpackhi_epi16:
    266 ; X32:       # BB#0:
    267 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    268 ; X32-NEXT:    kmovd %eax, %k1
    269 ; X32-NEXT:    vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    270 ; X32-NEXT:    retl
    271 ;
    272 ; X64-LABEL: test_mm512_maskz_unpackhi_epi16:
    273 ; X64:       # BB#0:
    274 ; X64-NEXT:    kmovd %edi, %k1
    275 ; X64-NEXT:    vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    276 ; X64-NEXT:    retq
    277   %arg0 = bitcast i32 %a0 to <32 x i1>
    278   %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
    279   %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
    280   %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
    281   %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
    282   %res2 = bitcast <32 x i16> %res1 to <8 x i64>
    283   ret <8 x i64> %res2
    284 }
    285 
    286 define <8 x i64> @test_mm512_unpacklo_epi8(<8 x i64> %a0, <8 x i64> %a1) {
    287 ; X32-LABEL: test_mm512_unpacklo_epi8:
    288 ; X32:       # BB#0:
    289 ; X32-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    290 ; X32-NEXT:    retl
    291 ;
    292 ; X64-LABEL: test_mm512_unpacklo_epi8:
    293 ; X64:       # BB#0:
    294 ; X64-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    295 ; X64-NEXT:    retq
    296   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
    297   %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
    298   %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
    299   %res1 = bitcast <64 x i8> %res0 to <8 x i64>
    300   ret <8 x i64> %res1
    301 }
    302 
    303 define <8 x i64> @test_mm512_mask_unpacklo_epi8(<8 x i64> %a0, i64* %a1, <8 x i64> %a2, <8 x i64> %a3) {
    304 ; X32-LABEL: test_mm512_mask_unpacklo_epi8:
    305 ; X32:       # BB#0:
    306 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    307 ; X32-NEXT:    kmovq (%eax), %k1
    308 ; X32-NEXT:    vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
    309 ; X32-NEXT:    retl
    310 ;
    311 ; X64-LABEL: test_mm512_mask_unpacklo_epi8:
    312 ; X64:       # BB#0:
    313 ; X64-NEXT:    kmovq (%rdi), %k1
    314 ; X64-NEXT:    vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
    315 ; X64-NEXT:    retq
    316   %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
    317   %arg1 = bitcast i64* %a1 to <64 x i1>*
    318   %sel1 = load <64 x i1>, <64 x i1>* %arg1
    319   %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
    320   %arg3 = bitcast <8 x i64> %a3 to <64 x i8>
    321   %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
    322   %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0
    323   %res2 = bitcast <64 x i8> %res1 to <8 x i64>
    324   ret <8 x i64> %res2
    325 }
    326 
    327 define <8 x i64> @test_mm512_maskz_unpacklo_epi8(i64* %a0, <8 x i64> %a1, <8 x i64> %a2) {
    328 ; X32-LABEL: test_mm512_maskz_unpacklo_epi8:
    329 ; X32:       # BB#0:
    330 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    331 ; X32-NEXT:    kmovq (%eax), %k1
    332 ; X32-NEXT:    vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    333 ; X32-NEXT:    retl
    334 ;
    335 ; X64-LABEL: test_mm512_maskz_unpacklo_epi8:
    336 ; X64:       # BB#0:
    337 ; X64-NEXT:    kmovq (%rdi), %k1
    338 ; X64-NEXT:    vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    339 ; X64-NEXT:    retq
    340   %arg0 = bitcast i64* %a0 to <64 x i1>*
    341   %sel0 = load <64 x i1>, <64 x i1>* %arg0
    342   %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
    343   %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
    344   %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
    345   %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer
    346   %res2 = bitcast <64 x i8> %res1 to <8 x i64>
    347   ret <8 x i64> %res2
    348 }
    349 
    350 define <8 x i64> @test_mm512_unpacklo_epi16(<8 x i64> %a0, <8 x i64> %a1) {
    351 ; X32-LABEL: test_mm512_unpacklo_epi16:
    352 ; X32:       # BB#0:
    353 ; X32-NEXT:    vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    354 ; X32-NEXT:    retl
    355 ;
    356 ; X64-LABEL: test_mm512_unpacklo_epi16:
    357 ; X64:       # BB#0:
    358 ; X64-NEXT:    vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    359 ; X64-NEXT:    retq
    360   %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
    361   %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
    362   %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
    363   %res1 = bitcast <32 x i16> %res0 to <8 x i64>
    364   ret <8 x i64> %res1
    365 }
    366 
    367 define <8 x i64> @test_mm512_mask_unpacklo_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) {
    368 ; X32-LABEL: test_mm512_mask_unpacklo_epi16:
    369 ; X32:       # BB#0:
    370 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    371 ; X32-NEXT:    kmovd %eax, %k1
    372 ; X32-NEXT:    vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
    373 ; X32-NEXT:    retl
    374 ;
    375 ; X64-LABEL: test_mm512_mask_unpacklo_epi16:
    376 ; X64:       # BB#0:
    377 ; X64-NEXT:    kmovd %edi, %k1
    378 ; X64-NEXT:    vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
    379 ; X64-NEXT:    retq
    380   %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
    381   %arg1 = bitcast i32 %a1 to <32 x i1>
    382   %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
    383   %arg3 = bitcast <8 x i64> %a3 to <32 x i16>
    384   %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
    385   %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
    386   %res2 = bitcast <32 x i16> %res1 to <8 x i64>
    387   ret <8 x i64> %res2
    388 }
    389 
    390 define <8 x i64> @test_mm512_maskz_unpacklo_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) {
    391 ; X32-LABEL: test_mm512_maskz_unpacklo_epi16:
    392 ; X32:       # BB#0:
    393 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    394 ; X32-NEXT:    kmovd %eax, %k1
    395 ; X32-NEXT:    vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    396 ; X32-NEXT:    retl
    397 ;
    398 ; X64-LABEL: test_mm512_maskz_unpacklo_epi16:
    399 ; X64:       # BB#0:
    400 ; X64-NEXT:    kmovd %edi, %k1
    401 ; X64-NEXT:    vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    402 ; X64-NEXT:    retq
    403   %arg0 = bitcast i32 %a0 to <32 x i1>
    404   %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
    405   %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
    406   %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
    407   %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
    408   %res2 = bitcast <32 x i16> %res1 to <8 x i64>
    409   ret <8 x i64> %res2
    410 }
    411 
    412 !0 = !{i32 1}
    413 
    414