1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-SLOW 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX-FAST 6 7 ; fold (shl 0, x) -> 0 8 define <4 x i32> @combine_vec_shl_zero(<4 x i32> %x) { 9 ; SSE-LABEL: combine_vec_shl_zero: 10 ; SSE: # %bb.0: 11 ; SSE-NEXT: xorps %xmm0, %xmm0 12 ; SSE-NEXT: retq 13 ; 14 ; AVX-LABEL: combine_vec_shl_zero: 15 ; AVX: # %bb.0: 16 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 17 ; AVX-NEXT: retq 18 %1 = shl <4 x i32> zeroinitializer, %x 19 ret <4 x i32> %1 20 } 21 22 ; fold (shl x, c >= size(x)) -> undef 23 define <4 x i32> @combine_vec_shl_outofrange0(<4 x i32> %x) { 24 ; CHECK-LABEL: combine_vec_shl_outofrange0: 25 ; CHECK: # %bb.0: 26 ; CHECK-NEXT: retq 27 %1 = shl <4 x i32> %x, <i32 33, i32 33, i32 33, i32 33> 28 ret <4 x i32> %1 29 } 30 31 define <4 x i32> @combine_vec_shl_outofrange1(<4 x i32> %x) { 32 ; CHECK-LABEL: combine_vec_shl_outofrange1: 33 ; CHECK: # %bb.0: 34 ; CHECK-NEXT: retq 35 %1 = shl <4 x i32> %x, <i32 33, i32 34, i32 35, i32 36> 36 ret <4 x i32> %1 37 } 38 39 define <4 x i32> @combine_vec_shl_outofrange2(<4 x i32> %a0) { 40 ; CHECK-LABEL: combine_vec_shl_outofrange2: 41 ; CHECK: # %bb.0: 42 ; CHECK-NEXT: retq 43 %1 = and <4 x i32> %a0, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> 44 %2 = shl <4 x i32> %1, <i32 33, i32 33, i32 33, i32 33> 45 ret <4 x i32> %2 46 } 47 48 ; fold (shl x, 0) -> x 49 define <4 x i32> @combine_vec_shl_by_zero(<4 x i32> %x) { 50 ; CHECK-LABEL: combine_vec_shl_by_zero: 51 ; CHECK: # %bb.0: 52 ; CHECK-NEXT: retq 53 %1 = shl <4 x i32> %x, zeroinitializer 54 ret <4 x i32> %1 55 } 56 57 ; if (shl x, c) is known to be zero, return 0 58 define <4 x i32> @combine_vec_shl_known_zero0(<4 x i32> %x) { 59 ; SSE-LABEL: combine_vec_shl_known_zero0: 60 ; SSE: # %bb.0: 61 ; SSE-NEXT: xorps %xmm0, %xmm0 62 ; SSE-NEXT: retq 63 ; 64 ; AVX-LABEL: combine_vec_shl_known_zero0: 65 ; AVX: # %bb.0: 66 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 67 ; AVX-NEXT: retq 68 %1 = and <4 x i32> %x, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760> 69 %2 = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 70 ret <4 x i32> %2 71 } 72 73 define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) { 74 ; SSE2-LABEL: combine_vec_shl_known_zero1: 75 ; SSE2: # %bb.0: 76 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 77 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65536,32768,16384,8192] 78 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 79 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 80 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 81 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 82 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 83 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 84 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 85 ; SSE2-NEXT: retq 86 ; 87 ; SSE41-LABEL: combine_vec_shl_known_zero1: 88 ; SSE41: # %bb.0: 89 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 90 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 91 ; SSE41-NEXT: retq 92 ; 93 ; AVX-LABEL: combine_vec_shl_known_zero1: 94 ; AVX: # %bb.0: 95 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 96 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 97 ; AVX-NEXT: retq 98 %1 = and <4 x i32> %x, <i32 4294901760, i32 8589803520, i32 17179607040, i32 34359214080> 99 %2 = shl <4 x i32> %1, <i32 16, i32 15, i32 14, i32 13> 100 ret <4 x i32> %2 101 } 102 103 ; fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 104 define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) { 105 ; SSE2-LABEL: combine_vec_shl_trunc_and: 106 ; SSE2: # %bb.0: 107 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 108 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 109 ; SSE2-NEXT: pslld $23, %xmm1 110 ; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 111 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 112 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 113 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 114 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 115 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 116 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 117 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 118 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 119 ; SSE2-NEXT: retq 120 ; 121 ; SSE41-LABEL: combine_vec_shl_trunc_and: 122 ; SSE41: # %bb.0: 123 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 124 ; SSE41-NEXT: andps {{.*}}(%rip), %xmm1 125 ; SSE41-NEXT: pslld $23, %xmm1 126 ; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1 127 ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 128 ; SSE41-NEXT: pmulld %xmm1, %xmm0 129 ; SSE41-NEXT: retq 130 ; 131 ; AVX-SLOW-LABEL: combine_vec_shl_trunc_and: 132 ; AVX-SLOW: # %bb.0: 133 ; AVX-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 134 ; AVX-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 135 ; AVX-SLOW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 136 ; AVX-SLOW-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 137 ; AVX-SLOW-NEXT: vzeroupper 138 ; AVX-SLOW-NEXT: retq 139 ; 140 ; AVX-FAST-LABEL: combine_vec_shl_trunc_and: 141 ; AVX-FAST: # %bb.0: 142 ; AVX-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 143 ; AVX-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 144 ; AVX-FAST-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 145 ; AVX-FAST-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 146 ; AVX-FAST-NEXT: vzeroupper 147 ; AVX-FAST-NEXT: retq 148 %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535> 149 %2 = trunc <4 x i64> %1 to <4 x i32> 150 %3 = shl <4 x i32> %x, %2 151 ret <4 x i32> %3 152 } 153 154 ; fold (shl (shl x, c1), c2) -> (shl x, (add c1, c2)) 155 define <4 x i32> @combine_vec_shl_shl0(<4 x i32> %x) { 156 ; SSE-LABEL: combine_vec_shl_shl0: 157 ; SSE: # %bb.0: 158 ; SSE-NEXT: pslld $6, %xmm0 159 ; SSE-NEXT: retq 160 ; 161 ; AVX-LABEL: combine_vec_shl_shl0: 162 ; AVX: # %bb.0: 163 ; AVX-NEXT: vpslld $6, %xmm0, %xmm0 164 ; AVX-NEXT: retq 165 %1 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 166 %2 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4> 167 ret <4 x i32> %2 168 } 169 170 define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) { 171 ; SSE2-LABEL: combine_vec_shl_shl1: 172 ; SSE2: # %bb.0: 173 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,64,256,1024] 174 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 175 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 176 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 177 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 178 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 179 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 180 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 181 ; SSE2-NEXT: retq 182 ; 183 ; SSE41-LABEL: combine_vec_shl_shl1: 184 ; SSE41: # %bb.0: 185 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 186 ; SSE41-NEXT: retq 187 ; 188 ; AVX-LABEL: combine_vec_shl_shl1: 189 ; AVX: # %bb.0: 190 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 191 ; AVX-NEXT: retq 192 %1 = shl <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3> 193 %2 = shl <4 x i32> %1, <i32 4, i32 5, i32 6, i32 7> 194 ret <4 x i32> %2 195 } 196 197 ; fold (shl (shl x, c1), c2) -> 0 198 define <4 x i32> @combine_vec_shl_shlr_zero0(<4 x i32> %x) { 199 ; SSE-LABEL: combine_vec_shl_shlr_zero0: 200 ; SSE: # %bb.0: 201 ; SSE-NEXT: xorps %xmm0, %xmm0 202 ; SSE-NEXT: retq 203 ; 204 ; AVX-LABEL: combine_vec_shl_shlr_zero0: 205 ; AVX: # %bb.0: 206 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 207 ; AVX-NEXT: retq 208 %1 = shl <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 209 %2 = shl <4 x i32> %1, <i32 20, i32 20, i32 20, i32 20> 210 ret <4 x i32> %2 211 } 212 213 define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) { 214 ; SSE-LABEL: combine_vec_shl_shl_zero1: 215 ; SSE: # %bb.0: 216 ; SSE-NEXT: xorps %xmm0, %xmm0 217 ; SSE-NEXT: retq 218 ; 219 ; AVX-LABEL: combine_vec_shl_shl_zero1: 220 ; AVX: # %bb.0: 221 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 222 ; AVX-NEXT: retq 223 %1 = shl <4 x i32> %x, <i32 17, i32 18, i32 19, i32 20> 224 %2 = shl <4 x i32> %1, <i32 25, i32 26, i32 27, i32 28> 225 ret <4 x i32> %2 226 } 227 228 ; fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 229 define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) { 230 ; SSE2-LABEL: combine_vec_shl_ext_shl0: 231 ; SSE2: # %bb.0: 232 ; SSE2-NEXT: movdqa %xmm0, %xmm1 233 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 234 ; SSE2-NEXT: pslld $20, %xmm0 235 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 236 ; SSE2-NEXT: pslld $20, %xmm1 237 ; SSE2-NEXT: retq 238 ; 239 ; SSE41-LABEL: combine_vec_shl_ext_shl0: 240 ; SSE41: # %bb.0: 241 ; SSE41-NEXT: movdqa %xmm0, %xmm1 242 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 243 ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 244 ; SSE41-NEXT: pslld $20, %xmm1 245 ; SSE41-NEXT: pslld $20, %xmm0 246 ; SSE41-NEXT: retq 247 ; 248 ; AVX-LABEL: combine_vec_shl_ext_shl0: 249 ; AVX: # %bb.0: 250 ; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 251 ; AVX-NEXT: vpslld $20, %ymm0, %ymm0 252 ; AVX-NEXT: retq 253 %1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 254 %2 = sext <8 x i16> %1 to <8 x i32> 255 %3 = shl <8 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 256 ret <8 x i32> %3 257 } 258 259 define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) { 260 ; SSE2-LABEL: combine_vec_shl_ext_shl1: 261 ; SSE2: # %bb.0: 262 ; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0 263 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 264 ; SSE2-NEXT: psrad $16, %xmm1 265 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 266 ; SSE2-NEXT: psrad $16, %xmm0 267 ; SSE2-NEXT: movdqa %xmm0, %xmm2 268 ; SSE2-NEXT: pslld $31, %xmm2 269 ; SSE2-NEXT: pslld $30, %xmm0 270 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 271 ; SSE2-NEXT: movdqa %xmm1, %xmm2 272 ; SSE2-NEXT: pslld $29, %xmm2 273 ; SSE2-NEXT: pslld $28, %xmm1 274 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 275 ; SSE2-NEXT: retq 276 ; 277 ; SSE41-LABEL: combine_vec_shl_ext_shl1: 278 ; SSE41: # %bb.0: 279 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0 280 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 281 ; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 282 ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 283 ; SSE41-NEXT: movdqa %xmm0, %xmm2 284 ; SSE41-NEXT: pslld $30, %xmm2 285 ; SSE41-NEXT: pslld $31, %xmm0 286 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 287 ; SSE41-NEXT: movdqa %xmm1, %xmm2 288 ; SSE41-NEXT: pslld $28, %xmm2 289 ; SSE41-NEXT: pslld $29, %xmm1 290 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 291 ; SSE41-NEXT: retq 292 ; 293 ; AVX-LABEL: combine_vec_shl_ext_shl1: 294 ; AVX: # %bb.0: 295 ; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 296 ; AVX-NEXT: vpmovsxwd %xmm0, %ymm0 297 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 298 ; AVX-NEXT: retq 299 %1 = shl <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 300 %2 = sext <8 x i16> %1 to <8 x i32> 301 %3 = shl <8 x i32> %2, <i32 31, i32 31, i32 30, i32 30, i32 29, i32 29, i32 28, i32 28> 302 ret <8 x i32> %3 303 } 304 305 ; fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 306 define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) { 307 ; SSE2-LABEL: combine_vec_shl_zext_lshr0: 308 ; SSE2: # %bb.0: 309 ; SSE2-NEXT: movdqa %xmm0, %xmm1 310 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 311 ; SSE2-NEXT: pxor %xmm2, %xmm2 312 ; SSE2-NEXT: movdqa %xmm1, %xmm0 313 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 314 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 315 ; SSE2-NEXT: retq 316 ; 317 ; SSE41-LABEL: combine_vec_shl_zext_lshr0: 318 ; SSE41: # %bb.0: 319 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 320 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 321 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 322 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 323 ; SSE41-NEXT: movdqa %xmm2, %xmm0 324 ; SSE41-NEXT: retq 325 ; 326 ; AVX-LABEL: combine_vec_shl_zext_lshr0: 327 ; AVX: # %bb.0: 328 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 329 ; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 330 ; AVX-NEXT: retq 331 %1 = lshr <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 332 %2 = zext <8 x i16> %1 to <8 x i32> 333 %3 = shl <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 334 ret <8 x i32> %3 335 } 336 337 define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) { 338 ; SSE2-LABEL: combine_vec_shl_zext_lshr1: 339 ; SSE2: # %bb.0: 340 ; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0 341 ; SSE2-NEXT: pxor %xmm1, %xmm1 342 ; SSE2-NEXT: movdqa %xmm0, %xmm2 343 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 344 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 345 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 346 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 347 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 348 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 349 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 350 ; SSE2-NEXT: pmuludq %xmm3, %xmm1 351 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 352 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 353 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,64,128,256] 354 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 355 ; SSE2-NEXT: pmuludq %xmm3, %xmm2 356 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 357 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 358 ; SSE2-NEXT: pmuludq %xmm4, %xmm2 359 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 360 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 361 ; SSE2-NEXT: retq 362 ; 363 ; SSE41-LABEL: combine_vec_shl_zext_lshr1: 364 ; SSE41: # %bb.0: 365 ; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm0 366 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 367 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 368 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 369 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 370 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 371 ; SSE41-NEXT: retq 372 ; 373 ; AVX-LABEL: combine_vec_shl_zext_lshr1: 374 ; AVX: # %bb.0: 375 ; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0 376 ; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 377 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 378 ; AVX-NEXT: retq 379 %1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 380 %2 = zext <8 x i16> %1 to <8 x i32> 381 %3 = shl <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 382 ret <8 x i32> %3 383 } 384 385 ; fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 386 define <4 x i32> @combine_vec_shl_ge_ashr_extact0(<4 x i32> %x) { 387 ; SSE-LABEL: combine_vec_shl_ge_ashr_extact0: 388 ; SSE: # %bb.0: 389 ; SSE-NEXT: pslld $2, %xmm0 390 ; SSE-NEXT: retq 391 ; 392 ; AVX-LABEL: combine_vec_shl_ge_ashr_extact0: 393 ; AVX: # %bb.0: 394 ; AVX-NEXT: vpslld $2, %xmm0, %xmm0 395 ; AVX-NEXT: retq 396 %1 = ashr exact <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 397 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 398 ret <4 x i32> %2 399 } 400 401 define <4 x i32> @combine_vec_shl_ge_ashr_extact1(<4 x i32> %x) { 402 ; SSE2-LABEL: combine_vec_shl_ge_ashr_extact1: 403 ; SSE2: # %bb.0: 404 ; SSE2-NEXT: movdqa %xmm0, %xmm1 405 ; SSE2-NEXT: psrad $8, %xmm1 406 ; SSE2-NEXT: movdqa %xmm0, %xmm2 407 ; SSE2-NEXT: psrad $5, %xmm2 408 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 409 ; SSE2-NEXT: movdqa %xmm0, %xmm1 410 ; SSE2-NEXT: psrad $4, %xmm1 411 ; SSE2-NEXT: psrad $3, %xmm0 412 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 413 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 414 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,64,128,256] 415 ; SSE2-NEXT: movaps %xmm0, %xmm1 416 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 417 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 418 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 419 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 420 ; SSE2-NEXT: pmuludq %xmm0, %xmm2 421 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 422 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 423 ; SSE2-NEXT: movdqa %xmm1, %xmm0 424 ; SSE2-NEXT: retq 425 ; 426 ; SSE41-LABEL: combine_vec_shl_ge_ashr_extact1: 427 ; SSE41: # %bb.0: 428 ; SSE41-NEXT: movdqa %xmm0, %xmm1 429 ; SSE41-NEXT: psrad $8, %xmm1 430 ; SSE41-NEXT: movdqa %xmm0, %xmm2 431 ; SSE41-NEXT: psrad $4, %xmm2 432 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 433 ; SSE41-NEXT: movdqa %xmm0, %xmm1 434 ; SSE41-NEXT: psrad $5, %xmm1 435 ; SSE41-NEXT: psrad $3, %xmm0 436 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 437 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 438 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 439 ; SSE41-NEXT: retq 440 ; 441 ; AVX-LABEL: combine_vec_shl_ge_ashr_extact1: 442 ; AVX: # %bb.0: 443 ; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 444 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 445 ; AVX-NEXT: retq 446 %1 = ashr exact <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8> 447 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 448 ret <4 x i32> %2 449 } 450 451 ; fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 452 define <4 x i32> @combine_vec_shl_lt_ashr_extact0(<4 x i32> %x) { 453 ; SSE-LABEL: combine_vec_shl_lt_ashr_extact0: 454 ; SSE: # %bb.0: 455 ; SSE-NEXT: psrad $2, %xmm0 456 ; SSE-NEXT: retq 457 ; 458 ; AVX-LABEL: combine_vec_shl_lt_ashr_extact0: 459 ; AVX: # %bb.0: 460 ; AVX-NEXT: vpsrad $2, %xmm0, %xmm0 461 ; AVX-NEXT: retq 462 %1 = ashr exact <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 463 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 464 ret <4 x i32> %2 465 } 466 467 define <4 x i32> @combine_vec_shl_lt_ashr_extact1(<4 x i32> %x) { 468 ; SSE2-LABEL: combine_vec_shl_lt_ashr_extact1: 469 ; SSE2: # %bb.0: 470 ; SSE2-NEXT: movdqa %xmm0, %xmm1 471 ; SSE2-NEXT: psrad $8, %xmm1 472 ; SSE2-NEXT: movdqa %xmm0, %xmm2 473 ; SSE2-NEXT: psrad $7, %xmm2 474 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 475 ; SSE2-NEXT: movdqa %xmm0, %xmm1 476 ; SSE2-NEXT: psrad $6, %xmm1 477 ; SSE2-NEXT: psrad $5, %xmm0 478 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 479 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 480 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8,16,32,256] 481 ; SSE2-NEXT: movaps %xmm0, %xmm1 482 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 483 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 484 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 485 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 486 ; SSE2-NEXT: pmuludq %xmm0, %xmm2 487 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 488 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 489 ; SSE2-NEXT: movdqa %xmm1, %xmm0 490 ; SSE2-NEXT: retq 491 ; 492 ; SSE41-LABEL: combine_vec_shl_lt_ashr_extact1: 493 ; SSE41: # %bb.0: 494 ; SSE41-NEXT: movdqa %xmm0, %xmm1 495 ; SSE41-NEXT: psrad $8, %xmm1 496 ; SSE41-NEXT: movdqa %xmm0, %xmm2 497 ; SSE41-NEXT: psrad $6, %xmm2 498 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 499 ; SSE41-NEXT: movdqa %xmm0, %xmm1 500 ; SSE41-NEXT: psrad $7, %xmm1 501 ; SSE41-NEXT: psrad $5, %xmm0 502 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 503 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 504 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 505 ; SSE41-NEXT: retq 506 ; 507 ; AVX-LABEL: combine_vec_shl_lt_ashr_extact1: 508 ; AVX: # %bb.0: 509 ; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 510 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 511 ; AVX-NEXT: retq 512 %1 = ashr exact <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 513 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8> 514 ret <4 x i32> %2 515 } 516 517 ; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) if C2 > C1 518 define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) { 519 ; SSE-LABEL: combine_vec_shl_gt_lshr0: 520 ; SSE: # %bb.0: 521 ; SSE-NEXT: pslld $2, %xmm0 522 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 523 ; SSE-NEXT: retq 524 ; 525 ; AVX-LABEL: combine_vec_shl_gt_lshr0: 526 ; AVX: # %bb.0: 527 ; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] 528 ; AVX-NEXT: vpslld $2, %xmm0, %xmm0 529 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 530 ; AVX-NEXT: retq 531 %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 532 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 533 ret <4 x i32> %2 534 } 535 536 define <4 x i32> @combine_vec_shl_gt_lshr1(<4 x i32> %x) { 537 ; SSE2-LABEL: combine_vec_shl_gt_lshr1: 538 ; SSE2: # %bb.0: 539 ; SSE2-NEXT: movdqa %xmm0, %xmm1 540 ; SSE2-NEXT: psrld $8, %xmm1 541 ; SSE2-NEXT: movdqa %xmm0, %xmm2 542 ; SSE2-NEXT: psrld $5, %xmm2 543 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 544 ; SSE2-NEXT: movdqa %xmm0, %xmm1 545 ; SSE2-NEXT: psrld $4, %xmm1 546 ; SSE2-NEXT: psrld $3, %xmm0 547 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 548 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 549 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,64,128,256] 550 ; SSE2-NEXT: movaps %xmm0, %xmm1 551 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 552 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 553 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 554 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 555 ; SSE2-NEXT: pmuludq %xmm0, %xmm2 556 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 557 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 558 ; SSE2-NEXT: movdqa %xmm1, %xmm0 559 ; SSE2-NEXT: retq 560 ; 561 ; SSE41-LABEL: combine_vec_shl_gt_lshr1: 562 ; SSE41: # %bb.0: 563 ; SSE41-NEXT: movdqa %xmm0, %xmm1 564 ; SSE41-NEXT: psrld $8, %xmm1 565 ; SSE41-NEXT: movdqa %xmm0, %xmm2 566 ; SSE41-NEXT: psrld $4, %xmm2 567 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 568 ; SSE41-NEXT: movdqa %xmm0, %xmm1 569 ; SSE41-NEXT: psrld $5, %xmm1 570 ; SSE41-NEXT: psrld $3, %xmm0 571 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 572 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 573 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 574 ; SSE41-NEXT: retq 575 ; 576 ; AVX-LABEL: combine_vec_shl_gt_lshr1: 577 ; AVX: # %bb.0: 578 ; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 579 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 580 ; AVX-NEXT: retq 581 %1 = lshr <4 x i32> %x, <i32 3, i32 4, i32 5, i32 8> 582 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 583 ret <4 x i32> %2 584 } 585 586 ; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 >= C2 587 define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) { 588 ; SSE-LABEL: combine_vec_shl_le_lshr0: 589 ; SSE: # %bb.0: 590 ; SSE-NEXT: psrld $2, %xmm0 591 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 592 ; SSE-NEXT: retq 593 ; 594 ; AVX-LABEL: combine_vec_shl_le_lshr0: 595 ; AVX: # %bb.0: 596 ; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816] 597 ; AVX-NEXT: vpsrld $2, %xmm0, %xmm0 598 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 599 ; AVX-NEXT: retq 600 %1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 601 %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 602 ret <4 x i32> %2 603 } 604 605 define <4 x i32> @combine_vec_shl_le_lshr1(<4 x i32> %x) { 606 ; SSE2-LABEL: combine_vec_shl_le_lshr1: 607 ; SSE2: # %bb.0: 608 ; SSE2-NEXT: movdqa %xmm0, %xmm1 609 ; SSE2-NEXT: psrld $8, %xmm1 610 ; SSE2-NEXT: movdqa %xmm0, %xmm2 611 ; SSE2-NEXT: psrld $7, %xmm2 612 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 613 ; SSE2-NEXT: movdqa %xmm0, %xmm1 614 ; SSE2-NEXT: psrld $6, %xmm1 615 ; SSE2-NEXT: psrld $5, %xmm0 616 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 617 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 618 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8,16,32,256] 619 ; SSE2-NEXT: movaps %xmm0, %xmm1 620 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 621 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 622 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 623 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 624 ; SSE2-NEXT: pmuludq %xmm0, %xmm2 625 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 626 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 627 ; SSE2-NEXT: movdqa %xmm1, %xmm0 628 ; SSE2-NEXT: retq 629 ; 630 ; SSE41-LABEL: combine_vec_shl_le_lshr1: 631 ; SSE41: # %bb.0: 632 ; SSE41-NEXT: movdqa %xmm0, %xmm1 633 ; SSE41-NEXT: psrld $8, %xmm1 634 ; SSE41-NEXT: movdqa %xmm0, %xmm2 635 ; SSE41-NEXT: psrld $6, %xmm2 636 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 637 ; SSE41-NEXT: movdqa %xmm0, %xmm1 638 ; SSE41-NEXT: psrld $7, %xmm1 639 ; SSE41-NEXT: psrld $5, %xmm0 640 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 641 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 642 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 643 ; SSE41-NEXT: retq 644 ; 645 ; AVX-LABEL: combine_vec_shl_le_lshr1: 646 ; AVX: # %bb.0: 647 ; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 648 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 649 ; AVX-NEXT: retq 650 %1 = lshr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 651 %2 = shl <4 x i32> %1, <i32 3, i32 4, i32 5, i32 8> 652 ret <4 x i32> %2 653 } 654 655 ; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 656 define <4 x i32> @combine_vec_shl_ashr0(<4 x i32> %x) { 657 ; SSE-LABEL: combine_vec_shl_ashr0: 658 ; SSE: # %bb.0: 659 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 660 ; SSE-NEXT: retq 661 ; 662 ; AVX-LABEL: combine_vec_shl_ashr0: 663 ; AVX: # %bb.0: 664 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] 665 ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 666 ; AVX-NEXT: retq 667 %1 = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 668 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 669 ret <4 x i32> %2 670 } 671 672 define <4 x i32> @combine_vec_shl_ashr1(<4 x i32> %x) { 673 ; SSE-LABEL: combine_vec_shl_ashr1: 674 ; SSE: # %bb.0: 675 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 676 ; SSE-NEXT: retq 677 ; 678 ; AVX-LABEL: combine_vec_shl_ashr1: 679 ; AVX: # %bb.0: 680 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 681 ; AVX-NEXT: retq 682 %1 = ashr <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 683 %2 = shl <4 x i32> %1, <i32 5, i32 6, i32 7, i32 8> 684 ret <4 x i32> %2 685 } 686 687 ; fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 688 define <4 x i32> @combine_vec_shl_add0(<4 x i32> %x) { 689 ; SSE-LABEL: combine_vec_shl_add0: 690 ; SSE: # %bb.0: 691 ; SSE-NEXT: pslld $2, %xmm0 692 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 693 ; SSE-NEXT: retq 694 ; 695 ; AVX-LABEL: combine_vec_shl_add0: 696 ; AVX: # %bb.0: 697 ; AVX-NEXT: vpslld $2, %xmm0, %xmm0 698 ; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 699 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 700 ; AVX-NEXT: retq 701 %1 = add <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 702 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 703 ret <4 x i32> %2 704 } 705 706 define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) { 707 ; SSE2-LABEL: combine_vec_shl_add1: 708 ; SSE2: # %bb.0: 709 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 710 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 711 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 712 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 713 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 714 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 715 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 716 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 717 ; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 718 ; SSE2-NEXT: retq 719 ; 720 ; SSE41-LABEL: combine_vec_shl_add1: 721 ; SSE41: # %bb.0: 722 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 723 ; SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 724 ; SSE41-NEXT: retq 725 ; 726 ; AVX-LABEL: combine_vec_shl_add1: 727 ; AVX: # %bb.0: 728 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 729 ; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 730 ; AVX-NEXT: retq 731 %1 = add <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 732 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 733 ret <4 x i32> %2 734 } 735 736 ; fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 737 define <4 x i32> @combine_vec_shl_or0(<4 x i32> %x) { 738 ; SSE-LABEL: combine_vec_shl_or0: 739 ; SSE: # %bb.0: 740 ; SSE-NEXT: pslld $2, %xmm0 741 ; SSE-NEXT: por {{.*}}(%rip), %xmm0 742 ; SSE-NEXT: retq 743 ; 744 ; AVX-LABEL: combine_vec_shl_or0: 745 ; AVX: # %bb.0: 746 ; AVX-NEXT: vpslld $2, %xmm0, %xmm0 747 ; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 748 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 749 ; AVX-NEXT: retq 750 %1 = or <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 751 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 752 ret <4 x i32> %2 753 } 754 755 define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) { 756 ; SSE2-LABEL: combine_vec_shl_or1: 757 ; SSE2: # %bb.0: 758 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,16] 759 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 760 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 761 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 762 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 763 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 764 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 765 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 766 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 767 ; SSE2-NEXT: retq 768 ; 769 ; SSE41-LABEL: combine_vec_shl_or1: 770 ; SSE41: # %bb.0: 771 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 772 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 773 ; SSE41-NEXT: retq 774 ; 775 ; AVX-LABEL: combine_vec_shl_or1: 776 ; AVX: # %bb.0: 777 ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 778 ; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 779 ; AVX-NEXT: retq 780 %1 = or <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 781 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 782 ret <4 x i32> %2 783 } 784 785 ; fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 786 define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) { 787 ; SSE2-LABEL: combine_vec_shl_mul0: 788 ; SSE2: # %bb.0: 789 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [20,20,20,20] 790 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 791 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 792 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 793 ; SSE2-NEXT: pmuludq %xmm1, %xmm2 794 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 795 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 796 ; SSE2-NEXT: retq 797 ; 798 ; SSE41-LABEL: combine_vec_shl_mul0: 799 ; SSE41: # %bb.0: 800 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 801 ; SSE41-NEXT: retq 802 ; 803 ; AVX-LABEL: combine_vec_shl_mul0: 804 ; AVX: # %bb.0: 805 ; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] 806 ; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 807 ; AVX-NEXT: retq 808 %1 = mul <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5> 809 %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> 810 ret <4 x i32> %2 811 } 812 813 define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) { 814 ; SSE2-LABEL: combine_vec_shl_mul1: 815 ; SSE2: # %bb.0: 816 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [10,24,56,128] 817 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 818 ; SSE2-NEXT: pmuludq %xmm1, %xmm0 819 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 820 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 821 ; SSE2-NEXT: pmuludq %xmm2, %xmm1 822 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 823 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 824 ; SSE2-NEXT: retq 825 ; 826 ; SSE41-LABEL: combine_vec_shl_mul1: 827 ; SSE41: # %bb.0: 828 ; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 829 ; SSE41-NEXT: retq 830 ; 831 ; AVX-LABEL: combine_vec_shl_mul1: 832 ; AVX: # %bb.0: 833 ; AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 834 ; AVX-NEXT: retq 835 %1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> 836 %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> 837 ret <4 x i32> %2 838 } 839