1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-XOP 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-AVX2 6 7 ; 8 ; PowOf2 (uniform) 9 ; 10 11 define <2 x i64> @mul_v2i64_8(<2 x i64> %a0) nounwind { 12 ; X86-LABEL: mul_v2i64_8: 13 ; X86: # %bb.0: 14 ; X86-NEXT: psllq $3, %xmm0 15 ; X86-NEXT: retl 16 ; 17 ; X64-LABEL: mul_v2i64_8: 18 ; X64: # %bb.0: 19 ; X64-NEXT: psllq $3, %xmm0 20 ; X64-NEXT: retq 21 ; 22 ; X64-AVX-LABEL: mul_v2i64_8: 23 ; X64-AVX: # %bb.0: 24 ; X64-AVX-NEXT: vpsllq $3, %xmm0, %xmm0 25 ; X64-AVX-NEXT: retq 26 %1 = mul <2 x i64> %a0, <i64 8, i64 8> 27 ret <2 x i64> %1 28 } 29 30 define <4 x i32> @mul_v4i32_8(<4 x i32> %a0) nounwind { 31 ; X86-LABEL: mul_v4i32_8: 32 ; X86: # %bb.0: 33 ; X86-NEXT: pslld $3, %xmm0 34 ; X86-NEXT: retl 35 ; 36 ; X64-LABEL: mul_v4i32_8: 37 ; X64: # %bb.0: 38 ; X64-NEXT: pslld $3, %xmm0 39 ; X64-NEXT: retq 40 ; 41 ; X64-AVX-LABEL: mul_v4i32_8: 42 ; X64-AVX: # %bb.0: 43 ; X64-AVX-NEXT: vpslld $3, %xmm0, %xmm0 44 ; X64-AVX-NEXT: retq 45 %1 = mul <4 x i32> %a0, <i32 8, i32 8, i32 8, i32 8> 46 ret <4 x i32> %1 47 } 48 49 define <8 x i16> @mul_v8i16_8(<8 x i16> %a0) nounwind { 50 ; X86-LABEL: mul_v8i16_8: 51 ; X86: # %bb.0: 52 ; X86-NEXT: psllw $3, %xmm0 53 ; X86-NEXT: retl 54 ; 55 ; X64-LABEL: mul_v8i16_8: 56 ; X64: # %bb.0: 57 ; X64-NEXT: psllw $3, %xmm0 58 ; X64-NEXT: retq 59 ; 60 ; X64-AVX-LABEL: mul_v8i16_8: 61 ; X64-AVX: # %bb.0: 62 ; X64-AVX-NEXT: vpsllw $3, %xmm0, %xmm0 63 ; X64-AVX-NEXT: retq 64 %1 = mul <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 65 ret <8 x i16> %1 66 } 67 68 define <16 x i8> @mul_v16i8_32(<16 x i8> %a0) nounwind { 69 ; X86-LABEL: mul_v16i8_32: 70 ; X86: # %bb.0: 71 ; X86-NEXT: psllw $5, %xmm0 72 ; X86-NEXT: pand {{\.LCPI.*}}, %xmm0 73 ; X86-NEXT: retl 74 ; 75 ; X64-LABEL: mul_v16i8_32: 76 ; X64: # %bb.0: 77 ; X64-NEXT: psllw $5, %xmm0 78 ; X64-NEXT: pand {{.*}}(%rip), %xmm0 79 ; X64-NEXT: retq 80 ; 81 ; X64-XOP-LABEL: mul_v16i8_32: 82 ; X64-XOP: # %bb.0: 83 ; X64-XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0 84 ; X64-XOP-NEXT: retq 85 ; 86 ; X64-AVX2-LABEL: mul_v16i8_32: 87 ; X64-AVX2: # %bb.0: 88 ; X64-AVX2-NEXT: vpsllw $5, %xmm0, %xmm0 89 ; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 90 ; X64-AVX2-NEXT: retq 91 %1 = mul <16 x i8> %a0, <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32> 92 ret <16 x i8> %1 93 } 94 95 ; 96 ; PowOf2 (non-uniform) 97 ; 98 99 define <2 x i64> @mul_v2i64_32_8(<2 x i64> %a0) nounwind { 100 ; X86-LABEL: mul_v2i64_32_8: 101 ; X86: # %bb.0: 102 ; X86-NEXT: movdqa %xmm0, %xmm1 103 ; X86-NEXT: psllq $3, %xmm1 104 ; X86-NEXT: psllq $5, %xmm0 105 ; X86-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 106 ; X86-NEXT: retl 107 ; 108 ; X64-LABEL: mul_v2i64_32_8: 109 ; X64: # %bb.0: 110 ; X64-NEXT: movdqa %xmm0, %xmm1 111 ; X64-NEXT: psllq $3, %xmm1 112 ; X64-NEXT: psllq $5, %xmm0 113 ; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 114 ; X64-NEXT: retq 115 ; 116 ; X64-XOP-LABEL: mul_v2i64_32_8: 117 ; X64-XOP: # %bb.0: 118 ; X64-XOP-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0 119 ; X64-XOP-NEXT: retq 120 ; 121 ; X64-AVX2-LABEL: mul_v2i64_32_8: 122 ; X64-AVX2: # %bb.0: 123 ; X64-AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 124 ; X64-AVX2-NEXT: retq 125 %1 = mul <2 x i64> %a0, <i64 32, i64 8> 126 ret <2 x i64> %1 127 } 128 129 define <4 x i32> @mul_v4i32_1_2_4_8(<4 x i32> %a0) nounwind { 130 ; X86-LABEL: mul_v4i32_1_2_4_8: 131 ; X86: # %bb.0: 132 ; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 133 ; X86-NEXT: retl 134 ; 135 ; X64-LABEL: mul_v4i32_1_2_4_8: 136 ; X64: # %bb.0: 137 ; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 138 ; X64-NEXT: retq 139 ; 140 ; X64-XOP-LABEL: mul_v4i32_1_2_4_8: 141 ; X64-XOP: # %bb.0: 142 ; X64-XOP-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0 143 ; X64-XOP-NEXT: retq 144 ; 145 ; X64-AVX2-LABEL: mul_v4i32_1_2_4_8: 146 ; X64-AVX2: # %bb.0: 147 ; X64-AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 148 ; X64-AVX2-NEXT: retq 149 %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 8> 150 ret <4 x i32> %1 151 } 152 153 define <8 x i16> @mul_v8i16_1_2_4_8_16_32_64_128(<8 x i16> %a0) nounwind { 154 ; X86-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: 155 ; X86: # %bb.0: 156 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 157 ; X86-NEXT: retl 158 ; 159 ; X64-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: 160 ; X64: # %bb.0: 161 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 162 ; X64-NEXT: retq 163 ; 164 ; X64-XOP-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: 165 ; X64-XOP: # %bb.0: 166 ; X64-XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 167 ; X64-XOP-NEXT: retq 168 ; 169 ; X64-AVX2-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: 170 ; X64-AVX2: # %bb.0: 171 ; X64-AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 172 ; X64-AVX2-NEXT: retq 173 %1 = mul <8 x i16> %a0, <i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128> 174 ret <8 x i16> %1 175 } 176 177 define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounwind { 178 ; X86-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: 179 ; X86: # %bb.0: 180 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8,1,2,4,8,1,2,4,8] 181 ; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 182 ; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 183 ; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 184 ; X86-NEXT: pmullw %xmm2, %xmm0 185 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 186 ; X86-NEXT: pand %xmm2, %xmm0 187 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm1 188 ; X86-NEXT: pand %xmm2, %xmm1 189 ; X86-NEXT: packuswb %xmm0, %xmm1 190 ; X86-NEXT: movdqa %xmm1, %xmm0 191 ; X86-NEXT: retl 192 ; 193 ; X64-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: 194 ; X64: # %bb.0: 195 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8,1,2,4,8,1,2,4,8] 196 ; X64-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 197 ; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 198 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 199 ; X64-NEXT: pmullw %xmm2, %xmm0 200 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 201 ; X64-NEXT: pand %xmm2, %xmm0 202 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm1 203 ; X64-NEXT: pand %xmm2, %xmm1 204 ; X64-NEXT: packuswb %xmm0, %xmm1 205 ; X64-NEXT: movdqa %xmm1, %xmm0 206 ; X64-NEXT: retq 207 ; 208 ; X64-XOP-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: 209 ; X64-XOP: # %bb.0: 210 ; X64-XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0 211 ; X64-XOP-NEXT: retq 212 ; 213 ; X64-AVX2-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: 214 ; X64-AVX2: # %bb.0: 215 ; X64-AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 216 ; X64-AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 217 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 218 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 219 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 220 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 221 ; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 222 ; X64-AVX2-NEXT: vzeroupper 223 ; X64-AVX2-NEXT: retq 224 %1 = mul <16 x i8> %a0, <i8 1, i8 2, i8 4, i8 8, i8 1, i8 2, i8 4, i8 8, i8 1, i8 2, i8 4, i8 8, i8 1, i8 2, i8 4, i8 8> 225 ret <16 x i8> %1 226 } 227 228 ; 229 ; PowOf2 + 1 (uniform) 230 ; 231 232 define <2 x i64> @mul_v2i64_17(<2 x i64> %a0) nounwind { 233 ; X86-LABEL: mul_v2i64_17: 234 ; X86: # %bb.0: 235 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [17,0,17,0] 236 ; X86-NEXT: movdqa %xmm0, %xmm2 237 ; X86-NEXT: pmuludq %xmm1, %xmm2 238 ; X86-NEXT: psrlq $32, %xmm0 239 ; X86-NEXT: pmuludq %xmm1, %xmm0 240 ; X86-NEXT: psllq $32, %xmm0 241 ; X86-NEXT: paddq %xmm2, %xmm0 242 ; X86-NEXT: retl 243 ; 244 ; X64-LABEL: mul_v2i64_17: 245 ; X64: # %bb.0: 246 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [17,17] 247 ; X64-NEXT: movdqa %xmm0, %xmm2 248 ; X64-NEXT: pmuludq %xmm1, %xmm2 249 ; X64-NEXT: psrlq $32, %xmm0 250 ; X64-NEXT: pmuludq %xmm1, %xmm0 251 ; X64-NEXT: psllq $32, %xmm0 252 ; X64-NEXT: paddq %xmm2, %xmm0 253 ; X64-NEXT: retq 254 ; 255 ; X64-AVX-LABEL: mul_v2i64_17: 256 ; X64-AVX: # %bb.0: 257 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] 258 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 259 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 260 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 261 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 262 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 263 ; X64-AVX-NEXT: retq 264 %1 = mul <2 x i64> %a0, <i64 17, i64 17> 265 ret <2 x i64> %1 266 } 267 268 define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind { 269 ; X86-LABEL: mul_v4i32_17: 270 ; X86: # %bb.0: 271 ; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 272 ; X86-NEXT: retl 273 ; 274 ; X64-LABEL: mul_v4i32_17: 275 ; X64: # %bb.0: 276 ; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 277 ; X64-NEXT: retq 278 ; 279 ; X64-XOP-LABEL: mul_v4i32_17: 280 ; X64-XOP: # %bb.0: 281 ; X64-XOP-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 282 ; X64-XOP-NEXT: retq 283 ; 284 ; X64-AVX2-LABEL: mul_v4i32_17: 285 ; X64-AVX2: # %bb.0: 286 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] 287 ; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 288 ; X64-AVX2-NEXT: retq 289 %1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 290 ret <4 x i32> %1 291 } 292 293 define <8 x i16> @mul_v8i16_17(<8 x i16> %a0) nounwind { 294 ; X86-LABEL: mul_v8i16_17: 295 ; X86: # %bb.0: 296 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 297 ; X86-NEXT: retl 298 ; 299 ; X64-LABEL: mul_v8i16_17: 300 ; X64: # %bb.0: 301 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 302 ; X64-NEXT: retq 303 ; 304 ; X64-AVX-LABEL: mul_v8i16_17: 305 ; X64-AVX: # %bb.0: 306 ; X64-AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 307 ; X64-AVX-NEXT: retq 308 %1 = mul <8 x i16> %a0, <i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17> 309 ret <8 x i16> %1 310 } 311 312 define <16 x i8> @mul_v16i8_17(<16 x i8> %a0) nounwind { 313 ; X86-LABEL: mul_v16i8_17: 314 ; X86: # %bb.0: 315 ; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 316 ; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 317 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] 318 ; X86-NEXT: pmullw %xmm2, %xmm0 319 ; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 320 ; X86-NEXT: pand %xmm3, %xmm0 321 ; X86-NEXT: pmullw %xmm2, %xmm1 322 ; X86-NEXT: pand %xmm3, %xmm1 323 ; X86-NEXT: packuswb %xmm0, %xmm1 324 ; X86-NEXT: movdqa %xmm1, %xmm0 325 ; X86-NEXT: retl 326 ; 327 ; X64-LABEL: mul_v16i8_17: 328 ; X64: # %bb.0: 329 ; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 330 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 331 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] 332 ; X64-NEXT: pmullw %xmm2, %xmm0 333 ; X64-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 334 ; X64-NEXT: pand %xmm3, %xmm0 335 ; X64-NEXT: pmullw %xmm2, %xmm1 336 ; X64-NEXT: pand %xmm3, %xmm1 337 ; X64-NEXT: packuswb %xmm0, %xmm1 338 ; X64-NEXT: movdqa %xmm1, %xmm0 339 ; X64-NEXT: retq 340 ; 341 ; X64-XOP-LABEL: mul_v16i8_17: 342 ; X64-XOP: # %bb.0: 343 ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 344 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] 345 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1 346 ; X64-XOP-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 347 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0 348 ; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] 349 ; X64-XOP-NEXT: retq 350 ; 351 ; X64-AVX2-LABEL: mul_v16i8_17: 352 ; X64-AVX2: # %bb.0: 353 ; X64-AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 354 ; X64-AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 355 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 356 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 357 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 358 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 359 ; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 360 ; X64-AVX2-NEXT: vzeroupper 361 ; X64-AVX2-NEXT: retq 362 %1 = mul <16 x i8> %a0, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17> 363 ret <16 x i8> %1 364 } 365 366 ; 367 ; PowOf2 + 1 (non-uniform) 368 ; 369 370 define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind { 371 ; X86-LABEL: mul_v2i64_17_65: 372 ; X86: # %bb.0: 373 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [17,0,65,0] 374 ; X86-NEXT: movdqa %xmm0, %xmm2 375 ; X86-NEXT: pmuludq %xmm1, %xmm2 376 ; X86-NEXT: psrlq $32, %xmm0 377 ; X86-NEXT: pmuludq %xmm1, %xmm0 378 ; X86-NEXT: psllq $32, %xmm0 379 ; X86-NEXT: paddq %xmm2, %xmm0 380 ; X86-NEXT: retl 381 ; 382 ; X64-LABEL: mul_v2i64_17_65: 383 ; X64: # %bb.0: 384 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [17,65] 385 ; X64-NEXT: movdqa %xmm0, %xmm2 386 ; X64-NEXT: pmuludq %xmm1, %xmm2 387 ; X64-NEXT: psrlq $32, %xmm0 388 ; X64-NEXT: pmuludq %xmm1, %xmm0 389 ; X64-NEXT: psllq $32, %xmm0 390 ; X64-NEXT: paddq %xmm2, %xmm0 391 ; X64-NEXT: retq 392 ; 393 ; X64-AVX-LABEL: mul_v2i64_17_65: 394 ; X64-AVX: # %bb.0: 395 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,65] 396 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 397 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 398 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 399 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 400 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 401 ; X64-AVX-NEXT: retq 402 %1 = mul <2 x i64> %a0, <i64 17, i64 65> 403 ret <2 x i64> %1 404 } 405 406 define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind { 407 ; X86-LABEL: mul_v4i32_5_17_33_65: 408 ; X86: # %bb.0: 409 ; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 410 ; X86-NEXT: retl 411 ; 412 ; X64-LABEL: mul_v4i32_5_17_33_65: 413 ; X64: # %bb.0: 414 ; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 415 ; X64-NEXT: retq 416 ; 417 ; X64-AVX-LABEL: mul_v4i32_5_17_33_65: 418 ; X64-AVX: # %bb.0: 419 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 420 ; X64-AVX-NEXT: retq 421 %1 = mul <4 x i32> %a0, <i32 5, i32 17, i32 33, i32 65> 422 ret <4 x i32> %1 423 } 424 425 define <8 x i16> @mul_v8i16_2_3_9_17_33_65_129_257(<8 x i16> %a0) nounwind { 426 ; X86-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: 427 ; X86: # %bb.0: 428 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 429 ; X86-NEXT: retl 430 ; 431 ; X64-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: 432 ; X64: # %bb.0: 433 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 434 ; X64-NEXT: retq 435 ; 436 ; X64-AVX-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: 437 ; X64-AVX: # %bb.0: 438 ; X64-AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 439 ; X64-AVX-NEXT: retq 440 %1 = mul <8 x i16> %a0, <i16 2, i16 3, i16 9, i16 17, i16 33, i16 65, i16 129, i16 257> 441 ret <8 x i16> %1 442 } 443 444 define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> %a0) nounwind { 445 ; X86-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: 446 ; X86: # %bb.0: 447 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3] 448 ; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 449 ; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 450 ; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 451 ; X86-NEXT: pmullw %xmm2, %xmm0 452 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 453 ; X86-NEXT: pand %xmm2, %xmm0 454 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm1 455 ; X86-NEXT: pand %xmm2, %xmm1 456 ; X86-NEXT: packuswb %xmm0, %xmm1 457 ; X86-NEXT: movdqa %xmm1, %xmm0 458 ; X86-NEXT: retl 459 ; 460 ; X64-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: 461 ; X64: # %bb.0: 462 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3] 463 ; X64-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 464 ; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 465 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 466 ; X64-NEXT: pmullw %xmm2, %xmm0 467 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 468 ; X64-NEXT: pand %xmm2, %xmm0 469 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm1 470 ; X64-NEXT: pand %xmm2, %xmm1 471 ; X64-NEXT: packuswb %xmm0, %xmm1 472 ; X64-NEXT: movdqa %xmm1, %xmm0 473 ; X64-NEXT: retq 474 ; 475 ; X64-XOP-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: 476 ; X64-XOP: # %bb.0: 477 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3] 478 ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 479 ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 480 ; X64-XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1 481 ; X64-XOP-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 482 ; X64-XOP-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 483 ; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] 484 ; X64-XOP-NEXT: retq 485 ; 486 ; X64-AVX2-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: 487 ; X64-AVX2: # %bb.0: 488 ; X64-AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 489 ; X64-AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 490 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 491 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 492 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 493 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 494 ; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 495 ; X64-AVX2-NEXT: vzeroupper 496 ; X64-AVX2-NEXT: retq 497 %1 = mul <16 x i8> %a0, <i8 2, i8 3, i8 9, i8 17, i8 33, i8 65, i8 129, i8 2, i8 3, i8 9, i8 17, i8 33, i8 65, i8 129, i8 2, i8 3> 498 ret <16 x i8> %1 499 } 500 501 ; 502 ; PowOf2 - 1 (uniform) 503 ; 504 505 define <2 x i64> @mul_v2i64_7(<2 x i64> %a0) nounwind { 506 ; X86-LABEL: mul_v2i64_7: 507 ; X86: # %bb.0: 508 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [7,0,7,0] 509 ; X86-NEXT: movdqa %xmm0, %xmm2 510 ; X86-NEXT: pmuludq %xmm1, %xmm2 511 ; X86-NEXT: psrlq $32, %xmm0 512 ; X86-NEXT: pmuludq %xmm1, %xmm0 513 ; X86-NEXT: psllq $32, %xmm0 514 ; X86-NEXT: paddq %xmm2, %xmm0 515 ; X86-NEXT: retl 516 ; 517 ; X64-LABEL: mul_v2i64_7: 518 ; X64: # %bb.0: 519 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [7,7] 520 ; X64-NEXT: movdqa %xmm0, %xmm2 521 ; X64-NEXT: pmuludq %xmm1, %xmm2 522 ; X64-NEXT: psrlq $32, %xmm0 523 ; X64-NEXT: pmuludq %xmm1, %xmm0 524 ; X64-NEXT: psllq $32, %xmm0 525 ; X64-NEXT: paddq %xmm2, %xmm0 526 ; X64-NEXT: retq 527 ; 528 ; X64-AVX-LABEL: mul_v2i64_7: 529 ; X64-AVX: # %bb.0: 530 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] 531 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 532 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 533 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 534 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 535 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 536 ; X64-AVX-NEXT: retq 537 %1 = mul <2 x i64> %a0, <i64 7, i64 7> 538 ret <2 x i64> %1 539 } 540 541 define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind { 542 ; X86-LABEL: mul_v4i32_7: 543 ; X86: # %bb.0: 544 ; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 545 ; X86-NEXT: retl 546 ; 547 ; X64-LABEL: mul_v4i32_7: 548 ; X64: # %bb.0: 549 ; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 550 ; X64-NEXT: retq 551 ; 552 ; X64-XOP-LABEL: mul_v4i32_7: 553 ; X64-XOP: # %bb.0: 554 ; X64-XOP-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 555 ; X64-XOP-NEXT: retq 556 ; 557 ; X64-AVX2-LABEL: mul_v4i32_7: 558 ; X64-AVX2: # %bb.0: 559 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] 560 ; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 561 ; X64-AVX2-NEXT: retq 562 %1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7> 563 ret <4 x i32> %1 564 } 565 566 define <8 x i16> @mul_v8i16_7(<8 x i16> %a0) nounwind { 567 ; X86-LABEL: mul_v8i16_7: 568 ; X86: # %bb.0: 569 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 570 ; X86-NEXT: retl 571 ; 572 ; X64-LABEL: mul_v8i16_7: 573 ; X64: # %bb.0: 574 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 575 ; X64-NEXT: retq 576 ; 577 ; X64-AVX-LABEL: mul_v8i16_7: 578 ; X64-AVX: # %bb.0: 579 ; X64-AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 580 ; X64-AVX-NEXT: retq 581 %1 = mul <8 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 582 ret <8 x i16> %1 583 } 584 585 define <16 x i8> @mul_v16i8_31(<16 x i8> %a0) nounwind { 586 ; X86-LABEL: mul_v16i8_31: 587 ; X86: # %bb.0: 588 ; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 589 ; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 590 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 591 ; X86-NEXT: pmullw %xmm2, %xmm0 592 ; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 593 ; X86-NEXT: pand %xmm3, %xmm0 594 ; X86-NEXT: pmullw %xmm2, %xmm1 595 ; X86-NEXT: pand %xmm3, %xmm1 596 ; X86-NEXT: packuswb %xmm0, %xmm1 597 ; X86-NEXT: movdqa %xmm1, %xmm0 598 ; X86-NEXT: retl 599 ; 600 ; X64-LABEL: mul_v16i8_31: 601 ; X64: # %bb.0: 602 ; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 603 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 604 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 605 ; X64-NEXT: pmullw %xmm2, %xmm0 606 ; X64-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 607 ; X64-NEXT: pand %xmm3, %xmm0 608 ; X64-NEXT: pmullw %xmm2, %xmm1 609 ; X64-NEXT: pand %xmm3, %xmm1 610 ; X64-NEXT: packuswb %xmm0, %xmm1 611 ; X64-NEXT: movdqa %xmm1, %xmm0 612 ; X64-NEXT: retq 613 ; 614 ; X64-XOP-LABEL: mul_v16i8_31: 615 ; X64-XOP: # %bb.0: 616 ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 617 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 618 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm1, %xmm1 619 ; X64-XOP-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 620 ; X64-XOP-NEXT: vpmullw %xmm2, %xmm0, %xmm0 621 ; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] 622 ; X64-XOP-NEXT: retq 623 ; 624 ; X64-AVX2-LABEL: mul_v16i8_31: 625 ; X64-AVX2: # %bb.0: 626 ; X64-AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 627 ; X64-AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 628 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 629 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 630 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 631 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 632 ; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 633 ; X64-AVX2-NEXT: vzeroupper 634 ; X64-AVX2-NEXT: retq 635 %1 = mul <16 x i8> %a0, <i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31> 636 ret <16 x i8> %1 637 } 638 639 ; 640 ; PowOf2 - 1 (non-uniform) 641 ; 642 643 define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind { 644 ; X86-LABEL: mul_v2i64_15_63: 645 ; X86: # %bb.0: 646 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [15,0,63,0] 647 ; X86-NEXT: movdqa %xmm0, %xmm2 648 ; X86-NEXT: pmuludq %xmm1, %xmm2 649 ; X86-NEXT: psrlq $32, %xmm0 650 ; X86-NEXT: pmuludq %xmm1, %xmm0 651 ; X86-NEXT: psllq $32, %xmm0 652 ; X86-NEXT: paddq %xmm2, %xmm0 653 ; X86-NEXT: retl 654 ; 655 ; X64-LABEL: mul_v2i64_15_63: 656 ; X64: # %bb.0: 657 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,63] 658 ; X64-NEXT: movdqa %xmm0, %xmm2 659 ; X64-NEXT: pmuludq %xmm1, %xmm2 660 ; X64-NEXT: psrlq $32, %xmm0 661 ; X64-NEXT: pmuludq %xmm1, %xmm0 662 ; X64-NEXT: psllq $32, %xmm0 663 ; X64-NEXT: paddq %xmm2, %xmm0 664 ; X64-NEXT: retq 665 ; 666 ; X64-AVX-LABEL: mul_v2i64_15_63: 667 ; X64-AVX: # %bb.0: 668 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,63] 669 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 670 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 671 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 672 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 673 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 674 ; X64-AVX-NEXT: retq 675 %1 = mul <2 x i64> %a0, <i64 15, i64 63> 676 ret <2 x i64> %1 677 } 678 679 define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { 680 ; X86-LABEL: mul_v2i64_neg_15_63: 681 ; X86: # %bb.0: 682 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967281,4294967295,4294967233,4294967295] 683 ; X86-NEXT: movdqa %xmm0, %xmm2 684 ; X86-NEXT: pmuludq %xmm1, %xmm2 685 ; X86-NEXT: movdqa %xmm0, %xmm3 686 ; X86-NEXT: psrlq $32, %xmm3 687 ; X86-NEXT: pmuludq %xmm1, %xmm3 688 ; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0 689 ; X86-NEXT: paddq %xmm3, %xmm0 690 ; X86-NEXT: psllq $32, %xmm0 691 ; X86-NEXT: paddq %xmm2, %xmm0 692 ; X86-NEXT: retl 693 ; 694 ; X64-LABEL: mul_v2i64_neg_15_63: 695 ; X64: # %bb.0: 696 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553] 697 ; X64-NEXT: movdqa %xmm0, %xmm2 698 ; X64-NEXT: pmuludq %xmm1, %xmm2 699 ; X64-NEXT: movdqa %xmm0, %xmm3 700 ; X64-NEXT: psrlq $32, %xmm3 701 ; X64-NEXT: pmuludq %xmm1, %xmm3 702 ; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 703 ; X64-NEXT: paddq %xmm3, %xmm0 704 ; X64-NEXT: psllq $32, %xmm0 705 ; X64-NEXT: paddq %xmm2, %xmm0 706 ; X64-NEXT: retq 707 ; 708 ; X64-AVX-LABEL: mul_v2i64_neg_15_63: 709 ; X64-AVX: # %bb.0: 710 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553] 711 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 712 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm3 713 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 714 ; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 715 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 716 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 717 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 718 ; X64-AVX-NEXT: retq 719 %1 = mul <2 x i64> %a0, <i64 -15, i64 -63> 720 ret <2 x i64> %1 721 } 722 723 define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { 724 ; X86-LABEL: mul_v2i64_neg_17_65: 725 ; X86: # %bb.0: 726 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967279,4294967295,4294967231,4294967295] 727 ; X86-NEXT: movdqa %xmm0, %xmm2 728 ; X86-NEXT: pmuludq %xmm1, %xmm2 729 ; X86-NEXT: movdqa %xmm0, %xmm3 730 ; X86-NEXT: psrlq $32, %xmm3 731 ; X86-NEXT: pmuludq %xmm1, %xmm3 732 ; X86-NEXT: pmuludq {{\.LCPI.*}}, %xmm0 733 ; X86-NEXT: paddq %xmm3, %xmm0 734 ; X86-NEXT: psllq $32, %xmm0 735 ; X86-NEXT: paddq %xmm2, %xmm0 736 ; X86-NEXT: retl 737 ; 738 ; X64-LABEL: mul_v2i64_neg_17_65: 739 ; X64: # %bb.0: 740 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551] 741 ; X64-NEXT: movdqa %xmm0, %xmm2 742 ; X64-NEXT: pmuludq %xmm1, %xmm2 743 ; X64-NEXT: movdqa %xmm0, %xmm3 744 ; X64-NEXT: psrlq $32, %xmm3 745 ; X64-NEXT: pmuludq %xmm1, %xmm3 746 ; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 747 ; X64-NEXT: paddq %xmm3, %xmm0 748 ; X64-NEXT: psllq $32, %xmm0 749 ; X64-NEXT: paddq %xmm2, %xmm0 750 ; X64-NEXT: retq 751 ; 752 ; X64-AVX-LABEL: mul_v2i64_neg_17_65: 753 ; X64-AVX: # %bb.0: 754 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551] 755 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 756 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm3 757 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 758 ; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 759 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 760 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 761 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 762 ; X64-AVX-NEXT: retq 763 %1 = mul <2 x i64> %a0, <i64 -17, i64 -65> 764 ret <2 x i64> %1 765 } 766 767 define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind { 768 ; X86-LABEL: mul_v2i64_0_1: 769 ; X86: # %bb.0: 770 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [0,0,1,0] 771 ; X86-NEXT: movdqa %xmm0, %xmm2 772 ; X86-NEXT: pmuludq %xmm1, %xmm2 773 ; X86-NEXT: psrlq $32, %xmm0 774 ; X86-NEXT: pmuludq %xmm1, %xmm0 775 ; X86-NEXT: psllq $32, %xmm0 776 ; X86-NEXT: paddq %xmm2, %xmm0 777 ; X86-NEXT: retl 778 ; 779 ; X64-LABEL: mul_v2i64_0_1: 780 ; X64: # %bb.0: 781 ; X64-NEXT: movl $1, %eax 782 ; X64-NEXT: movq %rax, %xmm1 783 ; X64-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 784 ; X64-NEXT: movdqa %xmm0, %xmm2 785 ; X64-NEXT: pmuludq %xmm1, %xmm2 786 ; X64-NEXT: psrlq $32, %xmm0 787 ; X64-NEXT: pmuludq %xmm1, %xmm0 788 ; X64-NEXT: psllq $32, %xmm0 789 ; X64-NEXT: paddq %xmm2, %xmm0 790 ; X64-NEXT: retq 791 ; 792 ; X64-AVX-LABEL: mul_v2i64_0_1: 793 ; X64-AVX: # %bb.0: 794 ; X64-AVX-NEXT: movl $1, %eax 795 ; X64-AVX-NEXT: vmovq %rax, %xmm1 796 ; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 797 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 798 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 799 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 800 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 801 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 802 ; X64-AVX-NEXT: retq 803 %1 = mul <2 x i64> %a0, <i64 0, i64 1> 804 ret <2 x i64> %1 805 } 806 807 define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { 808 ; X86-LABEL: mul_v2i64_neg_0_1: 809 ; X86: # %bb.0: 810 ; X86-NEXT: movdqa %xmm0, %xmm1 811 ; X86-NEXT: psrlq $32, %xmm1 812 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295] 813 ; X86-NEXT: pmuludq %xmm2, %xmm1 814 ; X86-NEXT: movdqa %xmm2, %xmm3 815 ; X86-NEXT: psrlq $32, %xmm3 816 ; X86-NEXT: pmuludq %xmm0, %xmm3 817 ; X86-NEXT: paddq %xmm1, %xmm3 818 ; X86-NEXT: psllq $32, %xmm3 819 ; X86-NEXT: pmuludq %xmm2, %xmm0 820 ; X86-NEXT: paddq %xmm3, %xmm0 821 ; X86-NEXT: retl 822 ; 823 ; X64-LABEL: mul_v2i64_neg_0_1: 824 ; X64: # %bb.0: 825 ; X64-NEXT: movdqa %xmm0, %xmm1 826 ; X64-NEXT: psrlq $32, %xmm1 827 ; X64-NEXT: movq $-1, %rax 828 ; X64-NEXT: movq %rax, %xmm2 829 ; X64-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] 830 ; X64-NEXT: pmuludq %xmm2, %xmm1 831 ; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 832 ; X64-NEXT: movq %rax, %xmm3 833 ; X64-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] 834 ; X64-NEXT: pmuludq %xmm0, %xmm3 835 ; X64-NEXT: paddq %xmm1, %xmm3 836 ; X64-NEXT: psllq $32, %xmm3 837 ; X64-NEXT: pmuludq %xmm2, %xmm0 838 ; X64-NEXT: paddq %xmm3, %xmm0 839 ; X64-NEXT: retq 840 ; 841 ; X64-AVX-LABEL: mul_v2i64_neg_0_1: 842 ; X64-AVX: # %bb.0: 843 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm1 844 ; X64-AVX-NEXT: movq $-1, %rax 845 ; X64-AVX-NEXT: vmovq %rax, %xmm2 846 ; X64-AVX-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] 847 ; X64-AVX-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 848 ; X64-AVX-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 849 ; X64-AVX-NEXT: vmovq %rax, %xmm3 850 ; X64-AVX-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] 851 ; X64-AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 852 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1 853 ; X64-AVX-NEXT: vpsllq $32, %xmm1, %xmm1 854 ; X64-AVX-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 855 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 856 ; X64-AVX-NEXT: retq 857 %1 = mul <2 x i64> %a0, <i64 0, i64 -1> 858 ret <2 x i64> %1 859 } 860 861 define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { 862 ; X86-LABEL: mul_v2i64_15_neg_63: 863 ; X86: # %bb.0: 864 ; X86-NEXT: movdqa %xmm0, %xmm1 865 ; X86-NEXT: psrlq $32, %xmm1 866 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295] 867 ; X86-NEXT: pmuludq %xmm2, %xmm1 868 ; X86-NEXT: movdqa %xmm2, %xmm3 869 ; X86-NEXT: psrlq $32, %xmm3 870 ; X86-NEXT: pmuludq %xmm0, %xmm3 871 ; X86-NEXT: paddq %xmm1, %xmm3 872 ; X86-NEXT: psllq $32, %xmm3 873 ; X86-NEXT: pmuludq %xmm2, %xmm0 874 ; X86-NEXT: paddq %xmm3, %xmm0 875 ; X86-NEXT: retl 876 ; 877 ; X64-LABEL: mul_v2i64_15_neg_63: 878 ; X64: # %bb.0: 879 ; X64-NEXT: movdqa %xmm0, %xmm1 880 ; X64-NEXT: psrlq $32, %xmm1 881 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,18446744073709551553] 882 ; X64-NEXT: pmuludq %xmm2, %xmm1 883 ; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 884 ; X64-NEXT: movq %rax, %xmm3 885 ; X64-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] 886 ; X64-NEXT: pmuludq %xmm0, %xmm3 887 ; X64-NEXT: paddq %xmm1, %xmm3 888 ; X64-NEXT: psllq $32, %xmm3 889 ; X64-NEXT: pmuludq %xmm2, %xmm0 890 ; X64-NEXT: paddq %xmm3, %xmm0 891 ; X64-NEXT: retq 892 ; 893 ; X64-AVX-LABEL: mul_v2i64_15_neg_63: 894 ; X64-AVX: # %bb.0: 895 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm1 896 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,18446744073709551553] 897 ; X64-AVX-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 898 ; X64-AVX-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 899 ; X64-AVX-NEXT: vmovq %rax, %xmm3 900 ; X64-AVX-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] 901 ; X64-AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 902 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1 903 ; X64-AVX-NEXT: vpsllq $32, %xmm1, %xmm1 904 ; X64-AVX-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 905 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 906 ; X64-AVX-NEXT: retq 907 %1 = mul <2 x i64> %a0, <i64 15, i64 -63> 908 ret <2 x i64> %1 909 } 910 911 define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind { 912 ; X86-LABEL: mul_v4i32_0_15_31_7: 913 ; X86: # %bb.0: 914 ; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 915 ; X86-NEXT: retl 916 ; 917 ; X64-LABEL: mul_v4i32_0_15_31_7: 918 ; X64: # %bb.0: 919 ; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 920 ; X64-NEXT: retq 921 ; 922 ; X64-AVX-LABEL: mul_v4i32_0_15_31_7: 923 ; X64-AVX: # %bb.0: 924 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 925 ; X64-AVX-NEXT: retq 926 %1 = mul <4 x i32> %a0, <i32 0, i32 15, i32 31, i32 7> 927 ret <4 x i32> %1 928 } 929 930 define <8 x i16> @mul_v8i16_0_1_7_15_31_63_127_255(<8 x i16> %a0) nounwind { 931 ; X86-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: 932 ; X86: # %bb.0: 933 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 934 ; X86-NEXT: retl 935 ; 936 ; X64-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: 937 ; X64: # %bb.0: 938 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 939 ; X64-NEXT: retq 940 ; 941 ; X64-AVX-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: 942 ; X64-AVX: # %bb.0: 943 ; X64-AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 944 ; X64-AVX-NEXT: retq 945 %1 = mul <8 x i16> %a0, <i16 0, i16 1, i16 7, i16 15, i16 31, i16 63, i16 127, i16 255> 946 ret <8 x i16> %1 947 } 948 949 define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> %a0) nounwind { 950 ; X86-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: 951 ; X86: # %bb.0: 952 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127] 953 ; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 954 ; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 955 ; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 956 ; X86-NEXT: pmullw %xmm2, %xmm0 957 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 958 ; X86-NEXT: pand %xmm2, %xmm0 959 ; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm1 960 ; X86-NEXT: pand %xmm2, %xmm1 961 ; X86-NEXT: packuswb %xmm0, %xmm1 962 ; X86-NEXT: movdqa %xmm1, %xmm0 963 ; X86-NEXT: retl 964 ; 965 ; X64-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: 966 ; X64: # %bb.0: 967 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127] 968 ; X64-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 969 ; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 970 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 971 ; X64-NEXT: pmullw %xmm2, %xmm0 972 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 973 ; X64-NEXT: pand %xmm2, %xmm0 974 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm1 975 ; X64-NEXT: pand %xmm2, %xmm1 976 ; X64-NEXT: packuswb %xmm0, %xmm1 977 ; X64-NEXT: movdqa %xmm1, %xmm0 978 ; X64-NEXT: retq 979 ; 980 ; X64-XOP-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: 981 ; X64-XOP: # %bb.0: 982 ; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127] 983 ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 984 ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 985 ; X64-XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1 986 ; X64-XOP-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 987 ; X64-XOP-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 988 ; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] 989 ; X64-XOP-NEXT: retq 990 ; 991 ; X64-AVX2-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: 992 ; X64-AVX2: # %bb.0: 993 ; X64-AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 994 ; X64-AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 995 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 996 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 997 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 998 ; X64-AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 999 ; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1000 ; X64-AVX2-NEXT: vzeroupper 1001 ; X64-AVX2-NEXT: retq 1002 %1 = mul <16 x i8> %a0, <i8 0, i8 1, i8 3, i8 7, i8 15, i8 31, i8 63, i8 127, i8 0, i8 1, i8 3, i8 7, i8 15, i8 31, i8 63, i8 127> 1003 ret <16 x i8> %1 1004 } 1005 1006 define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind { 1007 ; X86-LABEL: mul_v2i64_68_132: 1008 ; X86: # %bb.0: 1009 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [68,0,132,0] 1010 ; X86-NEXT: movdqa %xmm0, %xmm2 1011 ; X86-NEXT: pmuludq %xmm1, %xmm2 1012 ; X86-NEXT: psrlq $32, %xmm0 1013 ; X86-NEXT: pmuludq %xmm1, %xmm0 1014 ; X86-NEXT: psllq $32, %xmm0 1015 ; X86-NEXT: paddq %xmm2, %xmm0 1016 ; X86-NEXT: retl 1017 ; 1018 ; X64-LABEL: mul_v2i64_68_132: 1019 ; X64: # %bb.0: 1020 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [68,132] 1021 ; X64-NEXT: movdqa %xmm0, %xmm2 1022 ; X64-NEXT: pmuludq %xmm1, %xmm2 1023 ; X64-NEXT: psrlq $32, %xmm0 1024 ; X64-NEXT: pmuludq %xmm1, %xmm0 1025 ; X64-NEXT: psllq $32, %xmm0 1026 ; X64-NEXT: paddq %xmm2, %xmm0 1027 ; X64-NEXT: retq 1028 ; 1029 ; X64-AVX-LABEL: mul_v2i64_68_132: 1030 ; X64-AVX: # %bb.0: 1031 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [68,132] 1032 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 1033 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 1034 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 1035 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 1036 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 1037 ; X64-AVX-NEXT: retq 1038 %mul = mul <2 x i64> %x, <i64 68, i64 132> 1039 ret <2 x i64> %mul 1040 } 1041 1042 define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind { 1043 ; X86-LABEL: mul_v2i64_60_120: 1044 ; X86: # %bb.0: 1045 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [60,0,124,0] 1046 ; X86-NEXT: movdqa %xmm0, %xmm2 1047 ; X86-NEXT: pmuludq %xmm1, %xmm2 1048 ; X86-NEXT: psrlq $32, %xmm0 1049 ; X86-NEXT: pmuludq %xmm1, %xmm0 1050 ; X86-NEXT: psllq $32, %xmm0 1051 ; X86-NEXT: paddq %xmm2, %xmm0 1052 ; X86-NEXT: retl 1053 ; 1054 ; X64-LABEL: mul_v2i64_60_120: 1055 ; X64: # %bb.0: 1056 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [60,124] 1057 ; X64-NEXT: movdqa %xmm0, %xmm2 1058 ; X64-NEXT: pmuludq %xmm1, %xmm2 1059 ; X64-NEXT: psrlq $32, %xmm0 1060 ; X64-NEXT: pmuludq %xmm1, %xmm0 1061 ; X64-NEXT: psllq $32, %xmm0 1062 ; X64-NEXT: paddq %xmm2, %xmm0 1063 ; X64-NEXT: retq 1064 ; 1065 ; X64-AVX-LABEL: mul_v2i64_60_120: 1066 ; X64-AVX: # %bb.0: 1067 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,124] 1068 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 1069 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 1070 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 1071 ; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 1072 ; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 1073 ; X64-AVX-NEXT: retq 1074 %mul = mul <2 x i64> %x, <i64 60, i64 124> 1075 ret <2 x i64> %mul 1076 } 1077