Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop  | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-XOP
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-AVX2
      6 
      7 ;
      8 ; PowOf2 (uniform)
      9 ;
     10 
     11 define <2 x i64> @mul_v2i64_8(<2 x i64> %a0) nounwind {
     12 ; X86-LABEL: mul_v2i64_8:
     13 ; X86:       # %bb.0:
     14 ; X86-NEXT:    psllq $3, %xmm0
     15 ; X86-NEXT:    retl
     16 ;
     17 ; X64-LABEL: mul_v2i64_8:
     18 ; X64:       # %bb.0:
     19 ; X64-NEXT:    psllq $3, %xmm0
     20 ; X64-NEXT:    retq
     21 ;
     22 ; X64-AVX-LABEL: mul_v2i64_8:
     23 ; X64-AVX:       # %bb.0:
     24 ; X64-AVX-NEXT:    vpsllq $3, %xmm0, %xmm0
     25 ; X64-AVX-NEXT:    retq
     26   %1 = mul <2 x i64> %a0, <i64 8, i64 8>
     27   ret <2 x i64> %1
     28 }
     29 
     30 define <4 x i32> @mul_v4i32_8(<4 x i32> %a0) nounwind {
     31 ; X86-LABEL: mul_v4i32_8:
     32 ; X86:       # %bb.0:
     33 ; X86-NEXT:    pslld $3, %xmm0
     34 ; X86-NEXT:    retl
     35 ;
     36 ; X64-LABEL: mul_v4i32_8:
     37 ; X64:       # %bb.0:
     38 ; X64-NEXT:    pslld $3, %xmm0
     39 ; X64-NEXT:    retq
     40 ;
     41 ; X64-AVX-LABEL: mul_v4i32_8:
     42 ; X64-AVX:       # %bb.0:
     43 ; X64-AVX-NEXT:    vpslld $3, %xmm0, %xmm0
     44 ; X64-AVX-NEXT:    retq
     45   %1 = mul <4 x i32> %a0, <i32 8, i32 8, i32 8, i32 8>
     46   ret <4 x i32> %1
     47 }
     48 
     49 define <8 x i16> @mul_v8i16_8(<8 x i16> %a0) nounwind {
     50 ; X86-LABEL: mul_v8i16_8:
     51 ; X86:       # %bb.0:
     52 ; X86-NEXT:    psllw $3, %xmm0
     53 ; X86-NEXT:    retl
     54 ;
     55 ; X64-LABEL: mul_v8i16_8:
     56 ; X64:       # %bb.0:
     57 ; X64-NEXT:    psllw $3, %xmm0
     58 ; X64-NEXT:    retq
     59 ;
     60 ; X64-AVX-LABEL: mul_v8i16_8:
     61 ; X64-AVX:       # %bb.0:
     62 ; X64-AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
     63 ; X64-AVX-NEXT:    retq
     64   %1 = mul <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
     65   ret <8 x i16> %1
     66 }
     67 
     68 define <16 x i8> @mul_v16i8_32(<16 x i8> %a0) nounwind {
     69 ; X86-LABEL: mul_v16i8_32:
     70 ; X86:       # %bb.0:
     71 ; X86-NEXT:    psllw $5, %xmm0
     72 ; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
     73 ; X86-NEXT:    retl
     74 ;
     75 ; X64-LABEL: mul_v16i8_32:
     76 ; X64:       # %bb.0:
     77 ; X64-NEXT:    psllw $5, %xmm0
     78 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
     79 ; X64-NEXT:    retq
     80 ;
     81 ; X64-XOP-LABEL: mul_v16i8_32:
     82 ; X64-XOP:       # %bb.0:
     83 ; X64-XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
     84 ; X64-XOP-NEXT:    retq
     85 ;
     86 ; X64-AVX2-LABEL: mul_v16i8_32:
     87 ; X64-AVX2:       # %bb.0:
     88 ; X64-AVX2-NEXT:    vpsllw $5, %xmm0, %xmm0
     89 ; X64-AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
     90 ; X64-AVX2-NEXT:    retq
     91   %1 = mul <16 x i8> %a0, <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
     92   ret <16 x i8> %1
     93 }
     94 
     95 ;
     96 ; PowOf2 (non-uniform)
     97 ;
     98 
     99 define <2 x i64> @mul_v2i64_32_8(<2 x i64> %a0) nounwind {
    100 ; X86-LABEL: mul_v2i64_32_8:
    101 ; X86:       # %bb.0:
    102 ; X86-NEXT:    movdqa %xmm0, %xmm1
    103 ; X86-NEXT:    psllq $3, %xmm1
    104 ; X86-NEXT:    psllq $5, %xmm0
    105 ; X86-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    106 ; X86-NEXT:    retl
    107 ;
    108 ; X64-LABEL: mul_v2i64_32_8:
    109 ; X64:       # %bb.0:
    110 ; X64-NEXT:    movdqa %xmm0, %xmm1
    111 ; X64-NEXT:    psllq $3, %xmm1
    112 ; X64-NEXT:    psllq $5, %xmm0
    113 ; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    114 ; X64-NEXT:    retq
    115 ;
    116 ; X64-XOP-LABEL: mul_v2i64_32_8:
    117 ; X64-XOP:       # %bb.0:
    118 ; X64-XOP-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm0
    119 ; X64-XOP-NEXT:    retq
    120 ;
    121 ; X64-AVX2-LABEL: mul_v2i64_32_8:
    122 ; X64-AVX2:       # %bb.0:
    123 ; X64-AVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
    124 ; X64-AVX2-NEXT:    retq
    125   %1 = mul <2 x i64> %a0, <i64 32, i64 8>
    126   ret <2 x i64> %1
    127 }
    128 
    129 define <4 x i32> @mul_v4i32_1_2_4_8(<4 x i32> %a0) nounwind {
    130 ; X86-LABEL: mul_v4i32_1_2_4_8:
    131 ; X86:       # %bb.0:
    132 ; X86-NEXT:    pmulld {{\.LCPI.*}}, %xmm0
    133 ; X86-NEXT:    retl
    134 ;
    135 ; X64-LABEL: mul_v4i32_1_2_4_8:
    136 ; X64:       # %bb.0:
    137 ; X64-NEXT:    pmulld {{.*}}(%rip), %xmm0
    138 ; X64-NEXT:    retq
    139 ;
    140 ; X64-XOP-LABEL: mul_v4i32_1_2_4_8:
    141 ; X64-XOP:       # %bb.0:
    142 ; X64-XOP-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
    143 ; X64-XOP-NEXT:    retq
    144 ;
    145 ; X64-AVX2-LABEL: mul_v4i32_1_2_4_8:
    146 ; X64-AVX2:       # %bb.0:
    147 ; X64-AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
    148 ; X64-AVX2-NEXT:    retq
    149   %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 8>
    150   ret <4 x i32> %1
    151 }
    152 
    153 define <8 x i16> @mul_v8i16_1_2_4_8_16_32_64_128(<8 x i16> %a0) nounwind {
    154 ; X86-LABEL: mul_v8i16_1_2_4_8_16_32_64_128:
    155 ; X86:       # %bb.0:
    156 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    157 ; X86-NEXT:    retl
    158 ;
    159 ; X64-LABEL: mul_v8i16_1_2_4_8_16_32_64_128:
    160 ; X64:       # %bb.0:
    161 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
    162 ; X64-NEXT:    retq
    163 ;
    164 ; X64-XOP-LABEL: mul_v8i16_1_2_4_8_16_32_64_128:
    165 ; X64-XOP:       # %bb.0:
    166 ; X64-XOP-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm0
    167 ; X64-XOP-NEXT:    retq
    168 ;
    169 ; X64-AVX2-LABEL: mul_v8i16_1_2_4_8_16_32_64_128:
    170 ; X64-AVX2:       # %bb.0:
    171 ; X64-AVX2-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    172 ; X64-AVX2-NEXT:    retq
    173   %1 = mul <8 x i16> %a0, <i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128>
    174   ret <8 x i16> %1
    175 }
    176 
    177 define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounwind {
    178 ; X86-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8:
    179 ; X86:       # %bb.0:
    180 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8,1,2,4,8,1,2,4,8]
    181 ; X86-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    182 ; X86-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    183 ; X86-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    184 ; X86-NEXT:    pmullw %xmm2, %xmm0
    185 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    186 ; X86-NEXT:    pand %xmm2, %xmm0
    187 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
    188 ; X86-NEXT:    pand %xmm2, %xmm1
    189 ; X86-NEXT:    packuswb %xmm0, %xmm1
    190 ; X86-NEXT:    movdqa %xmm1, %xmm0
    191 ; X86-NEXT:    retl
    192 ;
    193 ; X64-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8:
    194 ; X64:       # %bb.0:
    195 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8,1,2,4,8,1,2,4,8]
    196 ; X64-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    197 ; X64-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    198 ; X64-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    199 ; X64-NEXT:    pmullw %xmm2, %xmm0
    200 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    201 ; X64-NEXT:    pand %xmm2, %xmm0
    202 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm1
    203 ; X64-NEXT:    pand %xmm2, %xmm1
    204 ; X64-NEXT:    packuswb %xmm0, %xmm1
    205 ; X64-NEXT:    movdqa %xmm1, %xmm0
    206 ; X64-NEXT:    retq
    207 ;
    208 ; X64-XOP-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8:
    209 ; X64-XOP:       # %bb.0:
    210 ; X64-XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
    211 ; X64-XOP-NEXT:    retq
    212 ;
    213 ; X64-AVX2-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8:
    214 ; X64-AVX2:       # %bb.0:
    215 ; X64-AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    216 ; X64-AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
    217 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    218 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    219 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    220 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    221 ; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    222 ; X64-AVX2-NEXT:    vzeroupper
    223 ; X64-AVX2-NEXT:    retq
    224   %1 = mul <16 x i8> %a0, <i8 1, i8 2, i8 4, i8 8, i8 1, i8 2, i8 4, i8 8, i8 1, i8 2, i8 4, i8 8, i8 1, i8 2, i8 4, i8 8>
    225   ret <16 x i8> %1
    226 }
    227 
    228 ;
    229 ; PowOf2 + 1 (uniform)
    230 ;
    231 
    232 define <2 x i64> @mul_v2i64_17(<2 x i64> %a0) nounwind {
    233 ; X86-LABEL: mul_v2i64_17:
    234 ; X86:       # %bb.0:
    235 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [17,0,17,0]
    236 ; X86-NEXT:    movdqa %xmm0, %xmm2
    237 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    238 ; X86-NEXT:    psrlq $32, %xmm0
    239 ; X86-NEXT:    pmuludq %xmm1, %xmm0
    240 ; X86-NEXT:    psllq $32, %xmm0
    241 ; X86-NEXT:    paddq %xmm2, %xmm0
    242 ; X86-NEXT:    retl
    243 ;
    244 ; X64-LABEL: mul_v2i64_17:
    245 ; X64:       # %bb.0:
    246 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [17,17]
    247 ; X64-NEXT:    movdqa %xmm0, %xmm2
    248 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    249 ; X64-NEXT:    psrlq $32, %xmm0
    250 ; X64-NEXT:    pmuludq %xmm1, %xmm0
    251 ; X64-NEXT:    psllq $32, %xmm0
    252 ; X64-NEXT:    paddq %xmm2, %xmm0
    253 ; X64-NEXT:    retq
    254 ;
    255 ; X64-AVX-LABEL: mul_v2i64_17:
    256 ; X64-AVX:       # %bb.0:
    257 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17]
    258 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    259 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
    260 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    261 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    262 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    263 ; X64-AVX-NEXT:    retq
    264   %1 = mul <2 x i64> %a0, <i64 17, i64 17>
    265   ret <2 x i64> %1
    266 }
    267 
    268 define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind {
    269 ; X86-LABEL: mul_v4i32_17:
    270 ; X86:       # %bb.0:
    271 ; X86-NEXT:    pmulld {{\.LCPI.*}}, %xmm0
    272 ; X86-NEXT:    retl
    273 ;
    274 ; X64-LABEL: mul_v4i32_17:
    275 ; X64:       # %bb.0:
    276 ; X64-NEXT:    pmulld {{.*}}(%rip), %xmm0
    277 ; X64-NEXT:    retq
    278 ;
    279 ; X64-XOP-LABEL: mul_v4i32_17:
    280 ; X64-XOP:       # %bb.0:
    281 ; X64-XOP-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    282 ; X64-XOP-NEXT:    retq
    283 ;
    284 ; X64-AVX2-LABEL: mul_v4i32_17:
    285 ; X64-AVX2:       # %bb.0:
    286 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
    287 ; X64-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
    288 ; X64-AVX2-NEXT:    retq
    289   %1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
    290   ret <4 x i32> %1
    291 }
    292 
    293 define <8 x i16> @mul_v8i16_17(<8 x i16> %a0) nounwind {
    294 ; X86-LABEL: mul_v8i16_17:
    295 ; X86:       # %bb.0:
    296 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    297 ; X86-NEXT:    retl
    298 ;
    299 ; X64-LABEL: mul_v8i16_17:
    300 ; X64:       # %bb.0:
    301 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
    302 ; X64-NEXT:    retq
    303 ;
    304 ; X64-AVX-LABEL: mul_v8i16_17:
    305 ; X64-AVX:       # %bb.0:
    306 ; X64-AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    307 ; X64-AVX-NEXT:    retq
    308   %1 = mul <8 x i16> %a0, <i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17>
    309   ret <8 x i16> %1
    310 }
    311 
    312 define <16 x i8> @mul_v16i8_17(<16 x i8> %a0) nounwind {
    313 ; X86-LABEL: mul_v16i8_17:
    314 ; X86:       # %bb.0:
    315 ; X86-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    316 ; X86-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    317 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17]
    318 ; X86-NEXT:    pmullw %xmm2, %xmm0
    319 ; X86-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
    320 ; X86-NEXT:    pand %xmm3, %xmm0
    321 ; X86-NEXT:    pmullw %xmm2, %xmm1
    322 ; X86-NEXT:    pand %xmm3, %xmm1
    323 ; X86-NEXT:    packuswb %xmm0, %xmm1
    324 ; X86-NEXT:    movdqa %xmm1, %xmm0
    325 ; X86-NEXT:    retl
    326 ;
    327 ; X64-LABEL: mul_v16i8_17:
    328 ; X64:       # %bb.0:
    329 ; X64-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    330 ; X64-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    331 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17]
    332 ; X64-NEXT:    pmullw %xmm2, %xmm0
    333 ; X64-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
    334 ; X64-NEXT:    pand %xmm3, %xmm0
    335 ; X64-NEXT:    pmullw %xmm2, %xmm1
    336 ; X64-NEXT:    pand %xmm3, %xmm1
    337 ; X64-NEXT:    packuswb %xmm0, %xmm1
    338 ; X64-NEXT:    movdqa %xmm1, %xmm0
    339 ; X64-NEXT:    retq
    340 ;
    341 ; X64-XOP-LABEL: mul_v16i8_17:
    342 ; X64-XOP:       # %bb.0:
    343 ; X64-XOP-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    344 ; X64-XOP-NEXT:    vmovdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17]
    345 ; X64-XOP-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
    346 ; X64-XOP-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    347 ; X64-XOP-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
    348 ; X64-XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14]
    349 ; X64-XOP-NEXT:    retq
    350 ;
    351 ; X64-AVX2-LABEL: mul_v16i8_17:
    352 ; X64-AVX2:       # %bb.0:
    353 ; X64-AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    354 ; X64-AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
    355 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    356 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    357 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    358 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    359 ; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    360 ; X64-AVX2-NEXT:    vzeroupper
    361 ; X64-AVX2-NEXT:    retq
    362   %1 = mul <16 x i8> %a0, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17>
    363   ret <16 x i8> %1
    364 }
    365 
    366 ;
    367 ; PowOf2 + 1 (non-uniform)
    368 ;
    369 
    370 define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind {
    371 ; X86-LABEL: mul_v2i64_17_65:
    372 ; X86:       # %bb.0:
    373 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [17,0,65,0]
    374 ; X86-NEXT:    movdqa %xmm0, %xmm2
    375 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    376 ; X86-NEXT:    psrlq $32, %xmm0
    377 ; X86-NEXT:    pmuludq %xmm1, %xmm0
    378 ; X86-NEXT:    psllq $32, %xmm0
    379 ; X86-NEXT:    paddq %xmm2, %xmm0
    380 ; X86-NEXT:    retl
    381 ;
    382 ; X64-LABEL: mul_v2i64_17_65:
    383 ; X64:       # %bb.0:
    384 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [17,65]
    385 ; X64-NEXT:    movdqa %xmm0, %xmm2
    386 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    387 ; X64-NEXT:    psrlq $32, %xmm0
    388 ; X64-NEXT:    pmuludq %xmm1, %xmm0
    389 ; X64-NEXT:    psllq $32, %xmm0
    390 ; X64-NEXT:    paddq %xmm2, %xmm0
    391 ; X64-NEXT:    retq
    392 ;
    393 ; X64-AVX-LABEL: mul_v2i64_17_65:
    394 ; X64-AVX:       # %bb.0:
    395 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,65]
    396 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    397 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
    398 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    399 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    400 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    401 ; X64-AVX-NEXT:    retq
    402   %1 = mul <2 x i64> %a0, <i64 17, i64 65>
    403   ret <2 x i64> %1
    404 }
    405 
    406 define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind {
    407 ; X86-LABEL: mul_v4i32_5_17_33_65:
    408 ; X86:       # %bb.0:
    409 ; X86-NEXT:    pmulld {{\.LCPI.*}}, %xmm0
    410 ; X86-NEXT:    retl
    411 ;
    412 ; X64-LABEL: mul_v4i32_5_17_33_65:
    413 ; X64:       # %bb.0:
    414 ; X64-NEXT:    pmulld {{.*}}(%rip), %xmm0
    415 ; X64-NEXT:    retq
    416 ;
    417 ; X64-AVX-LABEL: mul_v4i32_5_17_33_65:
    418 ; X64-AVX:       # %bb.0:
    419 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    420 ; X64-AVX-NEXT:    retq
    421   %1 = mul <4 x i32> %a0, <i32 5, i32 17, i32 33, i32 65>
    422   ret <4 x i32> %1
    423 }
    424 
    425 define <8 x i16> @mul_v8i16_2_3_9_17_33_65_129_257(<8 x i16> %a0) nounwind {
    426 ; X86-LABEL: mul_v8i16_2_3_9_17_33_65_129_257:
    427 ; X86:       # %bb.0:
    428 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    429 ; X86-NEXT:    retl
    430 ;
    431 ; X64-LABEL: mul_v8i16_2_3_9_17_33_65_129_257:
    432 ; X64:       # %bb.0:
    433 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
    434 ; X64-NEXT:    retq
    435 ;
    436 ; X64-AVX-LABEL: mul_v8i16_2_3_9_17_33_65_129_257:
    437 ; X64-AVX:       # %bb.0:
    438 ; X64-AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    439 ; X64-AVX-NEXT:    retq
    440   %1 = mul <8 x i16> %a0, <i16 2, i16 3, i16 9, i16 17, i16 33, i16 65, i16 129, i16 257>
    441   ret <8 x i16> %1
    442 }
    443 
    444 define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> %a0) nounwind {
    445 ; X86-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3:
    446 ; X86:       # %bb.0:
    447 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3]
    448 ; X86-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    449 ; X86-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    450 ; X86-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    451 ; X86-NEXT:    pmullw %xmm2, %xmm0
    452 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    453 ; X86-NEXT:    pand %xmm2, %xmm0
    454 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
    455 ; X86-NEXT:    pand %xmm2, %xmm1
    456 ; X86-NEXT:    packuswb %xmm0, %xmm1
    457 ; X86-NEXT:    movdqa %xmm1, %xmm0
    458 ; X86-NEXT:    retl
    459 ;
    460 ; X64-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3:
    461 ; X64:       # %bb.0:
    462 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3]
    463 ; X64-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    464 ; X64-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    465 ; X64-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    466 ; X64-NEXT:    pmullw %xmm2, %xmm0
    467 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    468 ; X64-NEXT:    pand %xmm2, %xmm0
    469 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm1
    470 ; X64-NEXT:    pand %xmm2, %xmm1
    471 ; X64-NEXT:    packuswb %xmm0, %xmm1
    472 ; X64-NEXT:    movdqa %xmm1, %xmm0
    473 ; X64-NEXT:    retq
    474 ;
    475 ; X64-XOP-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3:
    476 ; X64-XOP:       # %bb.0:
    477 ; X64-XOP-NEXT:    vmovdqa {{.*#+}} xmm1 = [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3]
    478 ; X64-XOP-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    479 ; X64-XOP-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    480 ; X64-XOP-NEXT:    vpmullw %xmm1, %xmm2, %xmm1
    481 ; X64-XOP-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    482 ; X64-XOP-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    483 ; X64-XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14]
    484 ; X64-XOP-NEXT:    retq
    485 ;
    486 ; X64-AVX2-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3:
    487 ; X64-AVX2:       # %bb.0:
    488 ; X64-AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    489 ; X64-AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
    490 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    491 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    492 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    493 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    494 ; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    495 ; X64-AVX2-NEXT:    vzeroupper
    496 ; X64-AVX2-NEXT:    retq
    497   %1 = mul <16 x i8> %a0, <i8 2, i8 3, i8 9, i8 17, i8 33, i8 65, i8 129, i8 2, i8 3, i8 9, i8 17, i8 33, i8 65, i8 129, i8 2, i8 3>
    498   ret <16 x i8> %1
    499 }
    500 
    501 ;
    502 ; PowOf2 - 1 (uniform)
    503 ;
    504 
    505 define <2 x i64> @mul_v2i64_7(<2 x i64> %a0) nounwind {
    506 ; X86-LABEL: mul_v2i64_7:
    507 ; X86:       # %bb.0:
    508 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [7,0,7,0]
    509 ; X86-NEXT:    movdqa %xmm0, %xmm2
    510 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    511 ; X86-NEXT:    psrlq $32, %xmm0
    512 ; X86-NEXT:    pmuludq %xmm1, %xmm0
    513 ; X86-NEXT:    psllq $32, %xmm0
    514 ; X86-NEXT:    paddq %xmm2, %xmm0
    515 ; X86-NEXT:    retl
    516 ;
    517 ; X64-LABEL: mul_v2i64_7:
    518 ; X64:       # %bb.0:
    519 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [7,7]
    520 ; X64-NEXT:    movdqa %xmm0, %xmm2
    521 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    522 ; X64-NEXT:    psrlq $32, %xmm0
    523 ; X64-NEXT:    pmuludq %xmm1, %xmm0
    524 ; X64-NEXT:    psllq $32, %xmm0
    525 ; X64-NEXT:    paddq %xmm2, %xmm0
    526 ; X64-NEXT:    retq
    527 ;
    528 ; X64-AVX-LABEL: mul_v2i64_7:
    529 ; X64-AVX:       # %bb.0:
    530 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7]
    531 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    532 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
    533 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    534 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    535 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    536 ; X64-AVX-NEXT:    retq
    537   %1 = mul <2 x i64> %a0, <i64 7, i64 7>
    538   ret <2 x i64> %1
    539 }
    540 
    541 define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind {
    542 ; X86-LABEL: mul_v4i32_7:
    543 ; X86:       # %bb.0:
    544 ; X86-NEXT:    pmulld {{\.LCPI.*}}, %xmm0
    545 ; X86-NEXT:    retl
    546 ;
    547 ; X64-LABEL: mul_v4i32_7:
    548 ; X64:       # %bb.0:
    549 ; X64-NEXT:    pmulld {{.*}}(%rip), %xmm0
    550 ; X64-NEXT:    retq
    551 ;
    552 ; X64-XOP-LABEL: mul_v4i32_7:
    553 ; X64-XOP:       # %bb.0:
    554 ; X64-XOP-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    555 ; X64-XOP-NEXT:    retq
    556 ;
    557 ; X64-AVX2-LABEL: mul_v4i32_7:
    558 ; X64-AVX2:       # %bb.0:
    559 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
    560 ; X64-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
    561 ; X64-AVX2-NEXT:    retq
    562   %1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7>
    563   ret <4 x i32> %1
    564 }
    565 
    566 define <8 x i16> @mul_v8i16_7(<8 x i16> %a0) nounwind {
    567 ; X86-LABEL: mul_v8i16_7:
    568 ; X86:       # %bb.0:
    569 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    570 ; X86-NEXT:    retl
    571 ;
    572 ; X64-LABEL: mul_v8i16_7:
    573 ; X64:       # %bb.0:
    574 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
    575 ; X64-NEXT:    retq
    576 ;
    577 ; X64-AVX-LABEL: mul_v8i16_7:
    578 ; X64-AVX:       # %bb.0:
    579 ; X64-AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    580 ; X64-AVX-NEXT:    retq
    581   %1 = mul <8 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
    582   ret <8 x i16> %1
    583 }
    584 
    585 define <16 x i8> @mul_v16i8_31(<16 x i8> %a0) nounwind {
    586 ; X86-LABEL: mul_v16i8_31:
    587 ; X86:       # %bb.0:
    588 ; X86-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    589 ; X86-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    590 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
    591 ; X86-NEXT:    pmullw %xmm2, %xmm0
    592 ; X86-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
    593 ; X86-NEXT:    pand %xmm3, %xmm0
    594 ; X86-NEXT:    pmullw %xmm2, %xmm1
    595 ; X86-NEXT:    pand %xmm3, %xmm1
    596 ; X86-NEXT:    packuswb %xmm0, %xmm1
    597 ; X86-NEXT:    movdqa %xmm1, %xmm0
    598 ; X86-NEXT:    retl
    599 ;
    600 ; X64-LABEL: mul_v16i8_31:
    601 ; X64:       # %bb.0:
    602 ; X64-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    603 ; X64-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    604 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
    605 ; X64-NEXT:    pmullw %xmm2, %xmm0
    606 ; X64-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
    607 ; X64-NEXT:    pand %xmm3, %xmm0
    608 ; X64-NEXT:    pmullw %xmm2, %xmm1
    609 ; X64-NEXT:    pand %xmm3, %xmm1
    610 ; X64-NEXT:    packuswb %xmm0, %xmm1
    611 ; X64-NEXT:    movdqa %xmm1, %xmm0
    612 ; X64-NEXT:    retq
    613 ;
    614 ; X64-XOP-LABEL: mul_v16i8_31:
    615 ; X64-XOP:       # %bb.0:
    616 ; X64-XOP-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    617 ; X64-XOP-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
    618 ; X64-XOP-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
    619 ; X64-XOP-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    620 ; X64-XOP-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
    621 ; X64-XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14]
    622 ; X64-XOP-NEXT:    retq
    623 ;
    624 ; X64-AVX2-LABEL: mul_v16i8_31:
    625 ; X64-AVX2:       # %bb.0:
    626 ; X64-AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    627 ; X64-AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
    628 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    629 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    630 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    631 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    632 ; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    633 ; X64-AVX2-NEXT:    vzeroupper
    634 ; X64-AVX2-NEXT:    retq
    635   %1 = mul <16 x i8> %a0, <i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31>
    636   ret <16 x i8> %1
    637 }
    638 
    639 ;
    640 ; PowOf2 - 1 (non-uniform)
    641 ;
    642 
    643 define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind {
    644 ; X86-LABEL: mul_v2i64_15_63:
    645 ; X86:       # %bb.0:
    646 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [15,0,63,0]
    647 ; X86-NEXT:    movdqa %xmm0, %xmm2
    648 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    649 ; X86-NEXT:    psrlq $32, %xmm0
    650 ; X86-NEXT:    pmuludq %xmm1, %xmm0
    651 ; X86-NEXT:    psllq $32, %xmm0
    652 ; X86-NEXT:    paddq %xmm2, %xmm0
    653 ; X86-NEXT:    retl
    654 ;
    655 ; X64-LABEL: mul_v2i64_15_63:
    656 ; X64:       # %bb.0:
    657 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [15,63]
    658 ; X64-NEXT:    movdqa %xmm0, %xmm2
    659 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    660 ; X64-NEXT:    psrlq $32, %xmm0
    661 ; X64-NEXT:    pmuludq %xmm1, %xmm0
    662 ; X64-NEXT:    psllq $32, %xmm0
    663 ; X64-NEXT:    paddq %xmm2, %xmm0
    664 ; X64-NEXT:    retq
    665 ;
    666 ; X64-AVX-LABEL: mul_v2i64_15_63:
    667 ; X64-AVX:       # %bb.0:
    668 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,63]
    669 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    670 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
    671 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    672 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    673 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    674 ; X64-AVX-NEXT:    retq
    675   %1 = mul <2 x i64> %a0, <i64 15, i64 63>
    676   ret <2 x i64> %1
    677 }
    678 
    679 define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
    680 ; X86-LABEL: mul_v2i64_neg_15_63:
    681 ; X86:       # %bb.0:
    682 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [4294967281,4294967295,4294967233,4294967295]
    683 ; X86-NEXT:    movdqa %xmm0, %xmm2
    684 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    685 ; X86-NEXT:    movdqa %xmm0, %xmm3
    686 ; X86-NEXT:    psrlq $32, %xmm3
    687 ; X86-NEXT:    pmuludq %xmm1, %xmm3
    688 ; X86-NEXT:    pmuludq {{\.LCPI.*}}, %xmm0
    689 ; X86-NEXT:    paddq %xmm3, %xmm0
    690 ; X86-NEXT:    psllq $32, %xmm0
    691 ; X86-NEXT:    paddq %xmm2, %xmm0
    692 ; X86-NEXT:    retl
    693 ;
    694 ; X64-LABEL: mul_v2i64_neg_15_63:
    695 ; X64:       # %bb.0:
    696 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553]
    697 ; X64-NEXT:    movdqa %xmm0, %xmm2
    698 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    699 ; X64-NEXT:    movdqa %xmm0, %xmm3
    700 ; X64-NEXT:    psrlq $32, %xmm3
    701 ; X64-NEXT:    pmuludq %xmm1, %xmm3
    702 ; X64-NEXT:    pmuludq {{.*}}(%rip), %xmm0
    703 ; X64-NEXT:    paddq %xmm3, %xmm0
    704 ; X64-NEXT:    psllq $32, %xmm0
    705 ; X64-NEXT:    paddq %xmm2, %xmm0
    706 ; X64-NEXT:    retq
    707 ;
    708 ; X64-AVX-LABEL: mul_v2i64_neg_15_63:
    709 ; X64-AVX:       # %bb.0:
    710 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553]
    711 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    712 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm3
    713 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
    714 ; X64-AVX-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm0
    715 ; X64-AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
    716 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    717 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    718 ; X64-AVX-NEXT:    retq
    719   %1 = mul <2 x i64> %a0, <i64 -15, i64 -63>
    720   ret <2 x i64> %1
    721 }
    722 
    723 define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
    724 ; X86-LABEL: mul_v2i64_neg_17_65:
    725 ; X86:       # %bb.0:
    726 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [4294967279,4294967295,4294967231,4294967295]
    727 ; X86-NEXT:    movdqa %xmm0, %xmm2
    728 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    729 ; X86-NEXT:    movdqa %xmm0, %xmm3
    730 ; X86-NEXT:    psrlq $32, %xmm3
    731 ; X86-NEXT:    pmuludq %xmm1, %xmm3
    732 ; X86-NEXT:    pmuludq {{\.LCPI.*}}, %xmm0
    733 ; X86-NEXT:    paddq %xmm3, %xmm0
    734 ; X86-NEXT:    psllq $32, %xmm0
    735 ; X86-NEXT:    paddq %xmm2, %xmm0
    736 ; X86-NEXT:    retl
    737 ;
    738 ; X64-LABEL: mul_v2i64_neg_17_65:
    739 ; X64:       # %bb.0:
    740 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551]
    741 ; X64-NEXT:    movdqa %xmm0, %xmm2
    742 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    743 ; X64-NEXT:    movdqa %xmm0, %xmm3
    744 ; X64-NEXT:    psrlq $32, %xmm3
    745 ; X64-NEXT:    pmuludq %xmm1, %xmm3
    746 ; X64-NEXT:    pmuludq {{.*}}(%rip), %xmm0
    747 ; X64-NEXT:    paddq %xmm3, %xmm0
    748 ; X64-NEXT:    psllq $32, %xmm0
    749 ; X64-NEXT:    paddq %xmm2, %xmm0
    750 ; X64-NEXT:    retq
    751 ;
    752 ; X64-AVX-LABEL: mul_v2i64_neg_17_65:
    753 ; X64-AVX:       # %bb.0:
    754 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551]
    755 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    756 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm3
    757 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
    758 ; X64-AVX-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm0
    759 ; X64-AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
    760 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    761 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    762 ; X64-AVX-NEXT:    retq
    763   %1 = mul <2 x i64> %a0, <i64 -17, i64 -65>
    764   ret <2 x i64> %1
    765 }
    766 
    767 define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
    768 ; X86-LABEL: mul_v2i64_0_1:
    769 ; X86:       # %bb.0:
    770 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [0,0,1,0]
    771 ; X86-NEXT:    movdqa %xmm0, %xmm2
    772 ; X86-NEXT:    pmuludq %xmm1, %xmm2
    773 ; X86-NEXT:    psrlq $32, %xmm0
    774 ; X86-NEXT:    pmuludq %xmm1, %xmm0
    775 ; X86-NEXT:    psllq $32, %xmm0
    776 ; X86-NEXT:    paddq %xmm2, %xmm0
    777 ; X86-NEXT:    retl
    778 ;
    779 ; X64-LABEL: mul_v2i64_0_1:
    780 ; X64:       # %bb.0:
    781 ; X64-NEXT:    movl $1, %eax
    782 ; X64-NEXT:    movq %rax, %xmm1
    783 ; X64-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    784 ; X64-NEXT:    movdqa %xmm0, %xmm2
    785 ; X64-NEXT:    pmuludq %xmm1, %xmm2
    786 ; X64-NEXT:    psrlq $32, %xmm0
    787 ; X64-NEXT:    pmuludq %xmm1, %xmm0
    788 ; X64-NEXT:    psllq $32, %xmm0
    789 ; X64-NEXT:    paddq %xmm2, %xmm0
    790 ; X64-NEXT:    retq
    791 ;
    792 ; X64-AVX-LABEL: mul_v2i64_0_1:
    793 ; X64-AVX:       # %bb.0:
    794 ; X64-AVX-NEXT:    movl $1, %eax
    795 ; X64-AVX-NEXT:    vmovq %rax, %xmm1
    796 ; X64-AVX-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    797 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
    798 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
    799 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    800 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
    801 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
    802 ; X64-AVX-NEXT:    retq
    803   %1 = mul <2 x i64> %a0, <i64 0, i64 1>
    804   ret <2 x i64> %1
    805 }
    806 
    807 define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
    808 ; X86-LABEL: mul_v2i64_neg_0_1:
    809 ; X86:       # %bb.0:
    810 ; X86-NEXT:    movdqa %xmm0, %xmm1
    811 ; X86-NEXT:    psrlq $32, %xmm1
    812 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295]
    813 ; X86-NEXT:    pmuludq %xmm2, %xmm1
    814 ; X86-NEXT:    movdqa %xmm2, %xmm3
    815 ; X86-NEXT:    psrlq $32, %xmm3
    816 ; X86-NEXT:    pmuludq %xmm0, %xmm3
    817 ; X86-NEXT:    paddq %xmm1, %xmm3
    818 ; X86-NEXT:    psllq $32, %xmm3
    819 ; X86-NEXT:    pmuludq %xmm2, %xmm0
    820 ; X86-NEXT:    paddq %xmm3, %xmm0
    821 ; X86-NEXT:    retl
    822 ;
    823 ; X64-LABEL: mul_v2i64_neg_0_1:
    824 ; X64:       # %bb.0:
    825 ; X64-NEXT:    movdqa %xmm0, %xmm1
    826 ; X64-NEXT:    psrlq $32, %xmm1
    827 ; X64-NEXT:    movq $-1, %rax
    828 ; X64-NEXT:    movq %rax, %xmm2
    829 ; X64-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
    830 ; X64-NEXT:    pmuludq %xmm2, %xmm1
    831 ; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
    832 ; X64-NEXT:    movq %rax, %xmm3
    833 ; X64-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
    834 ; X64-NEXT:    pmuludq %xmm0, %xmm3
    835 ; X64-NEXT:    paddq %xmm1, %xmm3
    836 ; X64-NEXT:    psllq $32, %xmm3
    837 ; X64-NEXT:    pmuludq %xmm2, %xmm0
    838 ; X64-NEXT:    paddq %xmm3, %xmm0
    839 ; X64-NEXT:    retq
    840 ;
    841 ; X64-AVX-LABEL: mul_v2i64_neg_0_1:
    842 ; X64-AVX:       # %bb.0:
    843 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm1
    844 ; X64-AVX-NEXT:    movq $-1, %rax
    845 ; X64-AVX-NEXT:    vmovq %rax, %xmm2
    846 ; X64-AVX-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
    847 ; X64-AVX-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
    848 ; X64-AVX-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
    849 ; X64-AVX-NEXT:    vmovq %rax, %xmm3
    850 ; X64-AVX-NEXT:    vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
    851 ; X64-AVX-NEXT:    vpmuludq %xmm3, %xmm0, %xmm3
    852 ; X64-AVX-NEXT:    vpaddq %xmm1, %xmm3, %xmm1
    853 ; X64-AVX-NEXT:    vpsllq $32, %xmm1, %xmm1
    854 ; X64-AVX-NEXT:    vpmuludq %xmm2, %xmm0, %xmm0
    855 ; X64-AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
    856 ; X64-AVX-NEXT:    retq
    857   %1 = mul <2 x i64> %a0, <i64 0, i64 -1>
    858   ret <2 x i64> %1
    859 }
    860 
    861 define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
    862 ; X86-LABEL: mul_v2i64_15_neg_63:
    863 ; X86:       # %bb.0:
    864 ; X86-NEXT:    movdqa %xmm0, %xmm1
    865 ; X86-NEXT:    psrlq $32, %xmm1
    866 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295]
    867 ; X86-NEXT:    pmuludq %xmm2, %xmm1
    868 ; X86-NEXT:    movdqa %xmm2, %xmm3
    869 ; X86-NEXT:    psrlq $32, %xmm3
    870 ; X86-NEXT:    pmuludq %xmm0, %xmm3
    871 ; X86-NEXT:    paddq %xmm1, %xmm3
    872 ; X86-NEXT:    psllq $32, %xmm3
    873 ; X86-NEXT:    pmuludq %xmm2, %xmm0
    874 ; X86-NEXT:    paddq %xmm3, %xmm0
    875 ; X86-NEXT:    retl
    876 ;
    877 ; X64-LABEL: mul_v2i64_15_neg_63:
    878 ; X64:       # %bb.0:
    879 ; X64-NEXT:    movdqa %xmm0, %xmm1
    880 ; X64-NEXT:    psrlq $32, %xmm1
    881 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [15,18446744073709551553]
    882 ; X64-NEXT:    pmuludq %xmm2, %xmm1
    883 ; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
    884 ; X64-NEXT:    movq %rax, %xmm3
    885 ; X64-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
    886 ; X64-NEXT:    pmuludq %xmm0, %xmm3
    887 ; X64-NEXT:    paddq %xmm1, %xmm3
    888 ; X64-NEXT:    psllq $32, %xmm3
    889 ; X64-NEXT:    pmuludq %xmm2, %xmm0
    890 ; X64-NEXT:    paddq %xmm3, %xmm0
    891 ; X64-NEXT:    retq
    892 ;
    893 ; X64-AVX-LABEL: mul_v2i64_15_neg_63:
    894 ; X64-AVX:       # %bb.0:
    895 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm1
    896 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,18446744073709551553]
    897 ; X64-AVX-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
    898 ; X64-AVX-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
    899 ; X64-AVX-NEXT:    vmovq %rax, %xmm3
    900 ; X64-AVX-NEXT:    vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
    901 ; X64-AVX-NEXT:    vpmuludq %xmm3, %xmm0, %xmm3
    902 ; X64-AVX-NEXT:    vpaddq %xmm1, %xmm3, %xmm1
    903 ; X64-AVX-NEXT:    vpsllq $32, %xmm1, %xmm1
    904 ; X64-AVX-NEXT:    vpmuludq %xmm2, %xmm0, %xmm0
    905 ; X64-AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
    906 ; X64-AVX-NEXT:    retq
    907   %1 = mul <2 x i64> %a0, <i64 15, i64 -63>
    908   ret <2 x i64> %1
    909 }
    910 
    911 define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind {
    912 ; X86-LABEL: mul_v4i32_0_15_31_7:
    913 ; X86:       # %bb.0:
    914 ; X86-NEXT:    pmulld {{\.LCPI.*}}, %xmm0
    915 ; X86-NEXT:    retl
    916 ;
    917 ; X64-LABEL: mul_v4i32_0_15_31_7:
    918 ; X64:       # %bb.0:
    919 ; X64-NEXT:    pmulld {{.*}}(%rip), %xmm0
    920 ; X64-NEXT:    retq
    921 ;
    922 ; X64-AVX-LABEL: mul_v4i32_0_15_31_7:
    923 ; X64-AVX:       # %bb.0:
    924 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    925 ; X64-AVX-NEXT:    retq
    926   %1 = mul <4 x i32> %a0, <i32 0, i32 15, i32 31, i32 7>
    927   ret <4 x i32> %1
    928 }
    929 
    930 define <8 x i16> @mul_v8i16_0_1_7_15_31_63_127_255(<8 x i16> %a0) nounwind {
    931 ; X86-LABEL: mul_v8i16_0_1_7_15_31_63_127_255:
    932 ; X86:       # %bb.0:
    933 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    934 ; X86-NEXT:    retl
    935 ;
    936 ; X64-LABEL: mul_v8i16_0_1_7_15_31_63_127_255:
    937 ; X64:       # %bb.0:
    938 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
    939 ; X64-NEXT:    retq
    940 ;
    941 ; X64-AVX-LABEL: mul_v8i16_0_1_7_15_31_63_127_255:
    942 ; X64-AVX:       # %bb.0:
    943 ; X64-AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    944 ; X64-AVX-NEXT:    retq
    945   %1 = mul <8 x i16> %a0, <i16 0, i16 1, i16 7, i16 15, i16 31, i16 63, i16 127, i16 255>
    946   ret <8 x i16> %1
    947 }
    948 
    949 define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> %a0) nounwind {
    950 ; X86-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127:
    951 ; X86:       # %bb.0:
    952 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127]
    953 ; X86-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    954 ; X86-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    955 ; X86-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    956 ; X86-NEXT:    pmullw %xmm2, %xmm0
    957 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    958 ; X86-NEXT:    pand %xmm2, %xmm0
    959 ; X86-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
    960 ; X86-NEXT:    pand %xmm2, %xmm1
    961 ; X86-NEXT:    packuswb %xmm0, %xmm1
    962 ; X86-NEXT:    movdqa %xmm1, %xmm0
    963 ; X86-NEXT:    retl
    964 ;
    965 ; X64-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127:
    966 ; X64:       # %bb.0:
    967 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127]
    968 ; X64-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    969 ; X64-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    970 ; X64-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    971 ; X64-NEXT:    pmullw %xmm2, %xmm0
    972 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    973 ; X64-NEXT:    pand %xmm2, %xmm0
    974 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm1
    975 ; X64-NEXT:    pand %xmm2, %xmm1
    976 ; X64-NEXT:    packuswb %xmm0, %xmm1
    977 ; X64-NEXT:    movdqa %xmm1, %xmm0
    978 ; X64-NEXT:    retq
    979 ;
    980 ; X64-XOP-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127:
    981 ; X64-XOP:       # %bb.0:
    982 ; X64-XOP-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127]
    983 ; X64-XOP-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    984 ; X64-XOP-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    985 ; X64-XOP-NEXT:    vpmullw %xmm1, %xmm2, %xmm1
    986 ; X64-XOP-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    987 ; X64-XOP-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    988 ; X64-XOP-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14]
    989 ; X64-XOP-NEXT:    retq
    990 ;
    991 ; X64-AVX2-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127:
    992 ; X64-AVX2:       # %bb.0:
    993 ; X64-AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    994 ; X64-AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
    995 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    996 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    997 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    998 ; X64-AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    999 ; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1000 ; X64-AVX2-NEXT:    vzeroupper
   1001 ; X64-AVX2-NEXT:    retq
   1002   %1 = mul <16 x i8> %a0, <i8 0, i8 1, i8 3, i8 7, i8 15, i8 31, i8 63, i8 127, i8 0, i8 1, i8 3, i8 7, i8 15, i8 31, i8 63, i8 127>
   1003   ret <16 x i8> %1
   1004 }
   1005 
   1006 define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind {
   1007 ; X86-LABEL: mul_v2i64_68_132:
   1008 ; X86:       # %bb.0:
   1009 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [68,0,132,0]
   1010 ; X86-NEXT:    movdqa %xmm0, %xmm2
   1011 ; X86-NEXT:    pmuludq %xmm1, %xmm2
   1012 ; X86-NEXT:    psrlq $32, %xmm0
   1013 ; X86-NEXT:    pmuludq %xmm1, %xmm0
   1014 ; X86-NEXT:    psllq $32, %xmm0
   1015 ; X86-NEXT:    paddq %xmm2, %xmm0
   1016 ; X86-NEXT:    retl
   1017 ;
   1018 ; X64-LABEL: mul_v2i64_68_132:
   1019 ; X64:       # %bb.0:
   1020 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [68,132]
   1021 ; X64-NEXT:    movdqa %xmm0, %xmm2
   1022 ; X64-NEXT:    pmuludq %xmm1, %xmm2
   1023 ; X64-NEXT:    psrlq $32, %xmm0
   1024 ; X64-NEXT:    pmuludq %xmm1, %xmm0
   1025 ; X64-NEXT:    psllq $32, %xmm0
   1026 ; X64-NEXT:    paddq %xmm2, %xmm0
   1027 ; X64-NEXT:    retq
   1028 ;
   1029 ; X64-AVX-LABEL: mul_v2i64_68_132:
   1030 ; X64-AVX:       # %bb.0:
   1031 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [68,132]
   1032 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
   1033 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
   1034 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
   1035 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
   1036 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
   1037 ; X64-AVX-NEXT:    retq
   1038   %mul = mul <2 x i64> %x, <i64 68, i64 132>
   1039   ret <2 x i64> %mul
   1040 }
   1041 
   1042 define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind {
   1043 ; X86-LABEL: mul_v2i64_60_120:
   1044 ; X86:       # %bb.0:
   1045 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [60,0,124,0]
   1046 ; X86-NEXT:    movdqa %xmm0, %xmm2
   1047 ; X86-NEXT:    pmuludq %xmm1, %xmm2
   1048 ; X86-NEXT:    psrlq $32, %xmm0
   1049 ; X86-NEXT:    pmuludq %xmm1, %xmm0
   1050 ; X86-NEXT:    psllq $32, %xmm0
   1051 ; X86-NEXT:    paddq %xmm2, %xmm0
   1052 ; X86-NEXT:    retl
   1053 ;
   1054 ; X64-LABEL: mul_v2i64_60_120:
   1055 ; X64:       # %bb.0:
   1056 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [60,124]
   1057 ; X64-NEXT:    movdqa %xmm0, %xmm2
   1058 ; X64-NEXT:    pmuludq %xmm1, %xmm2
   1059 ; X64-NEXT:    psrlq $32, %xmm0
   1060 ; X64-NEXT:    pmuludq %xmm1, %xmm0
   1061 ; X64-NEXT:    psllq $32, %xmm0
   1062 ; X64-NEXT:    paddq %xmm2, %xmm0
   1063 ; X64-NEXT:    retq
   1064 ;
   1065 ; X64-AVX-LABEL: mul_v2i64_60_120:
   1066 ; X64-AVX:       # %bb.0:
   1067 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [60,124]
   1068 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
   1069 ; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
   1070 ; X64-AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
   1071 ; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
   1072 ; X64-AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
   1073 ; X64-AVX-NEXT:    retq
   1074   %mul = mul <2 x i64> %x, <i64 60, i64 124>
   1075   ret <2 x i64> %mul
   1076 }
   1077