1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 ; Tests for SSE2 and below, without SSE3+. 10 11 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { 12 ; X86-SSE-LABEL: test1: 13 ; X86-SSE: # %bb.0: 14 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 15 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 16 ; X86-SSE-NEXT: movapd (%ecx), %xmm0 17 ; X86-SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 18 ; X86-SSE-NEXT: movapd %xmm0, (%eax) 19 ; X86-SSE-NEXT: retl 20 ; 21 ; X86-AVX-LABEL: test1: 22 ; X86-AVX: # %bb.0: 23 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 24 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 25 ; X86-AVX-NEXT: vmovapd (%ecx), %xmm0 26 ; X86-AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 27 ; X86-AVX-NEXT: vmovapd %xmm0, (%eax) 28 ; X86-AVX-NEXT: retl 29 ; 30 ; X64-SSE-LABEL: test1: 31 ; X64-SSE: # %bb.0: 32 ; X64-SSE-NEXT: movapd (%rsi), %xmm1 33 ; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 34 ; X64-SSE-NEXT: movapd %xmm1, (%rdi) 35 ; X64-SSE-NEXT: retq 36 ; 37 ; X64-AVX-LABEL: test1: 38 ; X64-AVX: # %bb.0: 39 ; X64-AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3] 40 ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 41 ; X64-AVX-NEXT: retq 42 %tmp3 = load <2 x double>, <2 x double>* %A, align 16 43 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 44 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > 45 store <2 x double> %tmp9, <2 x double>* %r, align 16 46 ret void 47 } 48 49 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { 50 ; X86-SSE-LABEL: test2: 51 ; X86-SSE: # %bb.0: 52 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 53 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 54 ; X86-SSE-NEXT: movapd (%ecx), %xmm0 55 ; X86-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 56 ; X86-SSE-NEXT: movapd %xmm0, (%eax) 57 ; X86-SSE-NEXT: retl 58 ; 59 ; X86-AVX-LABEL: test2: 60 ; X86-AVX: # %bb.0: 61 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 62 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 63 ; X86-AVX-NEXT: vmovapd (%ecx), %xmm0 64 ; X86-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 65 ; X86-AVX-NEXT: vmovapd %xmm0, (%eax) 66 ; X86-AVX-NEXT: retl 67 ; 68 ; X64-SSE-LABEL: test2: 69 ; X64-SSE: # %bb.0: 70 ; X64-SSE-NEXT: movaps (%rsi), %xmm1 71 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 72 ; X64-SSE-NEXT: movaps %xmm1, (%rdi) 73 ; X64-SSE-NEXT: retq 74 ; 75 ; X64-AVX-LABEL: test2: 76 ; X64-AVX: # %bb.0: 77 ; X64-AVX-NEXT: vmovaps (%rsi), %xmm1 78 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 79 ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 80 ; X64-AVX-NEXT: retq 81 %tmp3 = load <2 x double>, <2 x double>* %A, align 16 82 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 83 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > 84 store <2 x double> %tmp9, <2 x double>* %r, align 16 85 ret void 86 } 87 88 89 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind { 90 ; X86-SSE-LABEL: test3: 91 ; X86-SSE: # %bb.0: 92 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 93 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 94 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx 95 ; X86-SSE-NEXT: movaps (%edx), %xmm0 96 ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 97 ; X86-SSE-NEXT: movaps %xmm0, (%eax) 98 ; X86-SSE-NEXT: retl 99 ; 100 ; X86-AVX-LABEL: test3: 101 ; X86-AVX: # %bb.0: 102 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 103 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 104 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx 105 ; X86-AVX-NEXT: vmovaps (%edx), %xmm0 106 ; X86-AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 107 ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 108 ; X86-AVX-NEXT: retl 109 ; 110 ; X64-SSE-LABEL: test3: 111 ; X64-SSE: # %bb.0: 112 ; X64-SSE-NEXT: movaps (%rsi), %xmm0 113 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 114 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) 115 ; X64-SSE-NEXT: retq 116 ; 117 ; X64-AVX-LABEL: test3: 118 ; X64-AVX: # %bb.0: 119 ; X64-AVX-NEXT: vmovaps (%rsi), %xmm0 120 ; X64-AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 121 ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 122 ; X64-AVX-NEXT: retq 123 %tmp = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=2] 124 %tmp3 = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=2] 125 %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1] 126 %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1] 127 %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1] 128 %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1] 129 %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1] 130 %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1] 131 %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1] 132 %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1] 133 store <4 x float> %tmp13, <4 x float>* %res 134 ret void 135 } 136 137 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { 138 ; X86-SSE-LABEL: test4: 139 ; X86-SSE: # %bb.0: 140 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 141 ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,1,3,3] 142 ; X86-SSE-NEXT: movaps %xmm0, (%eax) 143 ; X86-SSE-NEXT: retl 144 ; 145 ; X86-AVX-LABEL: test4: 146 ; X86-AVX: # %bb.0: 147 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 148 ; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 149 ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 150 ; X86-AVX-NEXT: retl 151 ; 152 ; X64-SSE-LABEL: test4: 153 ; X64-SSE: # %bb.0: 154 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,1,3,3] 155 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) 156 ; X64-SSE-NEXT: retq 157 ; 158 ; X64-AVX-LABEL: test4: 159 ; X64-AVX: # %bb.0: 160 ; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 161 ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 162 ; X64-AVX-NEXT: retq 163 %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] 164 store <4 x float> %tmp5, <4 x float>* %res 165 ret void 166 } 167 168 define <4 x i32> @test5(i8** %ptr) nounwind { 169 ; X86-SSE-LABEL: test5: 170 ; X86-SSE: # %bb.0: 171 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 172 ; X86-SSE-NEXT: movl (%eax), %eax 173 ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 174 ; X86-SSE-NEXT: pxor %xmm0, %xmm0 175 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 176 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 177 ; X86-SSE-NEXT: retl 178 ; 179 ; X86-AVX-LABEL: test5: 180 ; X86-AVX: # %bb.0: 181 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 182 ; X86-AVX-NEXT: movl (%eax), %eax 183 ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 184 ; X86-AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 185 ; X86-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 186 ; X86-AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 187 ; X86-AVX-NEXT: retl 188 ; 189 ; X64-SSE-LABEL: test5: 190 ; X64-SSE: # %bb.0: 191 ; X64-SSE-NEXT: movq (%rdi), %rax 192 ; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 193 ; X64-SSE-NEXT: pxor %xmm0, %xmm0 194 ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 195 ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 196 ; X64-SSE-NEXT: retq 197 ; 198 ; X64-AVX-LABEL: test5: 199 ; X64-AVX: # %bb.0: 200 ; X64-AVX-NEXT: movq (%rdi), %rax 201 ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 202 ; X64-AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 203 ; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 204 ; X64-AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 205 ; X64-AVX-NEXT: retq 206 %tmp = load i8*, i8** %ptr ; <i8*> [#uses=1] 207 %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] 208 %tmp.upgrd.2 = load float, float* %tmp.upgrd.1 ; <float> [#uses=1] 209 %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1] 210 %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1] 211 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1] 212 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1] 213 %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1] 214 %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1] 215 %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1] 216 %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] 217 %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1] 218 ret <4 x i32> %tmp36 219 } 220 221 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { 222 ; X86-SSE-LABEL: test6: 223 ; X86-SSE: # %bb.0: 224 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 225 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 226 ; X86-SSE-NEXT: movaps (%ecx), %xmm0 227 ; X86-SSE-NEXT: movaps %xmm0, (%eax) 228 ; X86-SSE-NEXT: retl 229 ; 230 ; X86-AVX-LABEL: test6: 231 ; X86-AVX: # %bb.0: 232 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 233 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 234 ; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 235 ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 236 ; X86-AVX-NEXT: retl 237 ; 238 ; X64-SSE-LABEL: test6: 239 ; X64-SSE: # %bb.0: 240 ; X64-SSE-NEXT: movaps (%rsi), %xmm0 241 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) 242 ; X64-SSE-NEXT: retq 243 ; 244 ; X64-AVX-LABEL: test6: 245 ; X64-AVX: # %bb.0: 246 ; X64-AVX-NEXT: vmovaps (%rsi), %xmm0 247 ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 248 ; X64-AVX-NEXT: retq 249 %tmp1 = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=1] 250 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 251 store <4 x float> %tmp2, <4 x float>* %res 252 ret void 253 } 254 255 define void @test7() nounwind { 256 ; SSE-LABEL: test7: 257 ; SSE: # %bb.0: 258 ; SSE-NEXT: xorps %xmm0, %xmm0 259 ; SSE-NEXT: movaps %xmm0, 0 260 ; SSE-NEXT: ret{{[l|q]}} 261 ; 262 ; AVX-LABEL: test7: 263 ; AVX: # %bb.0: 264 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 265 ; AVX-NEXT: vmovaps %xmm0, 0 266 ; AVX-NEXT: ret{{[l|q]}} 267 bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1] 268 shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] 269 store <4 x float> %2, <4 x float>* null 270 ret void 271 } 272 273 @x = external global [4 x i32] 274 275 define <2 x i64> @test8() nounwind { 276 ; X86-SSE-LABEL: test8: 277 ; X86-SSE: # %bb.0: 278 ; X86-SSE-NEXT: movups x, %xmm0 279 ; X86-SSE-NEXT: retl 280 ; 281 ; X86-AVX-LABEL: test8: 282 ; X86-AVX: # %bb.0: 283 ; X86-AVX-NEXT: vmovups x, %xmm0 284 ; X86-AVX-NEXT: retl 285 ; 286 ; X64-SSE-LABEL: test8: 287 ; X64-SSE: # %bb.0: 288 ; X64-SSE-NEXT: movups {{.*}}(%rip), %xmm0 289 ; X64-SSE-NEXT: retq 290 ; 291 ; X64-AVX-LABEL: test8: 292 ; X64-AVX: # %bb.0: 293 ; X64-AVX-NEXT: vmovups {{.*}}(%rip), %xmm0 294 ; X64-AVX-NEXT: retq 295 %tmp = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] 296 %tmp3 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1] 297 %tmp5 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1] 298 %tmp7 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1] 299 %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1] 300 %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 301 %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1] 302 %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1] 303 %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1] 304 ret <2 x i64> %tmp16 305 } 306 307 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind { 308 ; X86-SSE-LABEL: test9: 309 ; X86-SSE: # %bb.0: 310 ; X86-SSE-NEXT: movups {{[0-9]+}}(%esp), %xmm0 311 ; X86-SSE-NEXT: retl 312 ; 313 ; X86-AVX-LABEL: test9: 314 ; X86-AVX: # %bb.0: 315 ; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 316 ; X86-AVX-NEXT: retl 317 ; 318 ; X64-SSE-LABEL: test9: 319 ; X64-SSE: # %bb.0: 320 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 321 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 322 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 323 ; X64-SSE-NEXT: retq 324 ; 325 ; X64-AVX-LABEL: test9: 326 ; X64-AVX: # %bb.0: 327 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 328 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 329 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 330 ; X64-AVX-NEXT: retq 331 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] 332 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] 333 %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] 334 %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] 335 ret <4 x float> %tmp13 336 } 337 338 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { 339 ; X86-SSE-LABEL: test10: 340 ; X86-SSE: # %bb.0: 341 ; X86-SSE-NEXT: movups {{[0-9]+}}(%esp), %xmm0 342 ; X86-SSE-NEXT: retl 343 ; 344 ; X86-AVX-LABEL: test10: 345 ; X86-AVX: # %bb.0: 346 ; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 347 ; X86-AVX-NEXT: retl 348 ; 349 ; X64-SSE-LABEL: test10: 350 ; X64-SSE: # %bb.0: 351 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 352 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 353 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 354 ; X64-SSE-NEXT: retq 355 ; 356 ; X64-AVX-LABEL: test10: 357 ; X64-AVX: # %bb.0: 358 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 359 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 360 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 361 ; X64-AVX-NEXT: retq 362 %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] 363 %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] 364 %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1] 365 %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1] 366 ret <4 x float> %tmp13 367 } 368 369 define <2 x double> @test11(double %a, double %b) nounwind { 370 ; X86-SSE-LABEL: test11: 371 ; X86-SSE: # %bb.0: 372 ; X86-SSE-NEXT: movups {{[0-9]+}}(%esp), %xmm0 373 ; X86-SSE-NEXT: retl 374 ; 375 ; X86-AVX-LABEL: test11: 376 ; X86-AVX: # %bb.0: 377 ; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 378 ; X86-AVX-NEXT: retl 379 ; 380 ; X64-SSE-LABEL: test11: 381 ; X64-SSE: # %bb.0: 382 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 383 ; X64-SSE-NEXT: retq 384 ; 385 ; X64-AVX-LABEL: test11: 386 ; X64-AVX: # %bb.0: 387 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 388 ; X64-AVX-NEXT: retq 389 %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] 390 %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] 391 ret <2 x double> %tmp7 392 } 393 394 define void @test12() nounwind { 395 ; SSE-LABEL: test12: 396 ; SSE: # %bb.0: 397 ; SSE-NEXT: movapd 0, %xmm0 398 ; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 399 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 400 ; SSE-NEXT: xorps %xmm2, %xmm2 401 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 402 ; SSE-NEXT: addps %xmm1, %xmm2 403 ; SSE-NEXT: movaps %xmm2, 0 404 ; SSE-NEXT: ret{{[l|q]}} 405 ; 406 ; AVX1-LABEL: test12: 407 ; AVX1: # %bb.0: 408 ; AVX1-NEXT: vmovaps 0, %xmm0 409 ; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3] 410 ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 411 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] 412 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 413 ; AVX1-NEXT: vmovaps %xmm0, 0 414 ; AVX1-NEXT: ret{{[l|q]}} 415 ; 416 ; AVX512-LABEL: test12: 417 ; AVX512: # %bb.0: 418 ; AVX512-NEXT: vmovaps 0, %xmm0 419 ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 420 ; AVX512-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3] 421 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 422 ; AVX512-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] 423 ; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0 424 ; AVX512-NEXT: vmovaps %xmm0, 0 425 ; AVX512-NEXT: ret{{[l|q]}} 426 %tmp1 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=2] 427 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 428 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 429 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1] 430 store <4 x float> %tmp4, <4 x float>* null 431 ret void 432 } 433 434 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { 435 ; X86-SSE-LABEL: test13: 436 ; X86-SSE: # %bb.0: 437 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 438 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 439 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx 440 ; X86-SSE-NEXT: movaps (%edx), %xmm0 441 ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 442 ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 443 ; X86-SSE-NEXT: movaps %xmm0, (%eax) 444 ; X86-SSE-NEXT: retl 445 ; 446 ; X86-AVX-LABEL: test13: 447 ; X86-AVX: # %bb.0: 448 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 449 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 450 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx 451 ; X86-AVX-NEXT: vmovaps (%edx), %xmm0 452 ; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 453 ; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 454 ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 455 ; X86-AVX-NEXT: retl 456 ; 457 ; X64-SSE-LABEL: test13: 458 ; X64-SSE: # %bb.0: 459 ; X64-SSE-NEXT: movaps (%rdx), %xmm0 460 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 461 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 462 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) 463 ; X64-SSE-NEXT: retq 464 ; 465 ; X64-AVX-LABEL: test13: 466 ; X64-AVX: # %bb.0: 467 ; X64-AVX-NEXT: vmovaps (%rdx), %xmm0 468 ; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1] 469 ; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 470 ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 471 ; X64-AVX-NEXT: retq 472 %tmp3 = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=1] 473 %tmp5 = load <4 x float>, <4 x float>* %C ; <<4 x float>> [#uses=1] 474 %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] 475 store <4 x float> %tmp11, <4 x float>* %res 476 ret void 477 } 478 479 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { 480 ; X86-SSE-LABEL: test14: 481 ; X86-SSE: # %bb.0: 482 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 483 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 484 ; X86-SSE-NEXT: movaps (%ecx), %xmm1 485 ; X86-SSE-NEXT: movaps (%eax), %xmm2 486 ; X86-SSE-NEXT: movaps %xmm2, %xmm0 487 ; X86-SSE-NEXT: addps %xmm1, %xmm0 488 ; X86-SSE-NEXT: subps %xmm1, %xmm2 489 ; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 490 ; X86-SSE-NEXT: retl 491 ; 492 ; X86-AVX-LABEL: test14: 493 ; X86-AVX: # %bb.0: 494 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 495 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 496 ; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 497 ; X86-AVX-NEXT: vmovaps (%eax), %xmm1 498 ; X86-AVX-NEXT: vaddps %xmm0, %xmm1, %xmm2 499 ; X86-AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 500 ; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0] 501 ; X86-AVX-NEXT: retl 502 ; 503 ; X64-SSE-LABEL: test14: 504 ; X64-SSE: # %bb.0: 505 ; X64-SSE-NEXT: movaps (%rsi), %xmm1 506 ; X64-SSE-NEXT: movaps (%rdi), %xmm2 507 ; X64-SSE-NEXT: movaps %xmm2, %xmm0 508 ; X64-SSE-NEXT: addps %xmm1, %xmm0 509 ; X64-SSE-NEXT: subps %xmm1, %xmm2 510 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 511 ; X64-SSE-NEXT: retq 512 ; 513 ; X64-AVX-LABEL: test14: 514 ; X64-AVX: # %bb.0: 515 ; X64-AVX-NEXT: vmovaps (%rsi), %xmm0 516 ; X64-AVX-NEXT: vmovaps (%rdi), %xmm1 517 ; X64-AVX-NEXT: vaddps %xmm0, %xmm1, %xmm2 518 ; X64-AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 519 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm0[0] 520 ; X64-AVX-NEXT: retq 521 %tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=2] 522 %tmp5 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=2] 523 %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] 524 %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1] 525 %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] 526 ret <4 x float> %tmp27 527 } 528 529 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind { 530 ; X86-SSE-LABEL: test15: 531 ; X86-SSE: # %bb.0: # %entry 532 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 533 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 534 ; X86-SSE-NEXT: movaps (%ecx), %xmm0 535 ; X86-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 536 ; X86-SSE-NEXT: retl 537 ; 538 ; X86-AVX-LABEL: test15: 539 ; X86-AVX: # %bb.0: # %entry 540 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 541 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 542 ; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 543 ; X86-AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 544 ; X86-AVX-NEXT: retl 545 ; 546 ; X64-SSE-LABEL: test15: 547 ; X64-SSE: # %bb.0: # %entry 548 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 549 ; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 550 ; X64-SSE-NEXT: retq 551 ; 552 ; X64-AVX-LABEL: test15: 553 ; X64-AVX: # %bb.0: # %entry 554 ; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 555 ; X64-AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 556 ; X64-AVX-NEXT: retq 557 entry: 558 %tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=1] 559 %tmp3 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=1] 560 %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1] 561 ret <4 x float> %tmp4 562 } 563 564 ; PR8900 565 566 define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) { 567 ; X86-SSE-LABEL: test16: 568 ; X86-SSE: # %bb.0: 569 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 570 ; X86-SSE-NEXT: movaps 96(%eax), %xmm0 571 ; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 572 ; X86-SSE-NEXT: retl 573 ; 574 ; X86-AVX-LABEL: test16: 575 ; X86-AVX: # %bb.0: 576 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 577 ; X86-AVX-NEXT: vmovaps 96(%eax), %ymm0 578 ; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 579 ; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 580 ; X86-AVX-NEXT: vzeroupper 581 ; X86-AVX-NEXT: retl 582 ; 583 ; X64-SSE-LABEL: test16: 584 ; X64-SSE: # %bb.0: 585 ; X64-SSE-NEXT: movaps 96(%rdi), %xmm0 586 ; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 587 ; X64-SSE-NEXT: retq 588 ; 589 ; X64-AVX-LABEL: test16: 590 ; X64-AVX: # %bb.0: 591 ; X64-AVX-NEXT: vmovaps 96(%rdi), %ymm0 592 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 593 ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 594 ; X64-AVX-NEXT: vzeroupper 595 ; X64-AVX-NEXT: retq 596 %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3 597 %i6 = load <4 x double>, <4 x double>* %i5, align 32 598 %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2> 599 ret <2 x double> %i7 600 } 601 602 ; PR9009 603 define fastcc void @test17() nounwind { 604 ; X86-SSE-LABEL: test17: 605 ; X86-SSE: # %bb.0: # %entry 606 ; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = <u,u,32768,32768> 607 ; X86-SSE-NEXT: movaps %xmm0, (%eax) 608 ; X86-SSE-NEXT: retl 609 ; 610 ; X86-AVX1-LABEL: test17: 611 ; X86-AVX1: # %bb.0: # %entry 612 ; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <u,u,32768,32768> 613 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) 614 ; X86-AVX1-NEXT: retl 615 ; 616 ; X86-AVX512-LABEL: test17: 617 ; X86-AVX512: # %bb.0: # %entry 618 ; X86-AVX512-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768] 619 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) 620 ; X86-AVX512-NEXT: retl 621 ; 622 ; X64-SSE-LABEL: test17: 623 ; X64-SSE: # %bb.0: # %entry 624 ; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = <u,u,32768,32768> 625 ; X64-SSE-NEXT: movaps %xmm0, (%rax) 626 ; X64-SSE-NEXT: retq 627 ; 628 ; X64-AVX1-LABEL: test17: 629 ; X64-AVX1: # %bb.0: # %entry 630 ; X64-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <u,u,32768,32768> 631 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rax) 632 ; X64-AVX1-NEXT: retq 633 ; 634 ; X64-AVX512-LABEL: test17: 635 ; X64-AVX512: # %bb.0: # %entry 636 ; X64-AVX512-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768] 637 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rax) 638 ; X64-AVX512-NEXT: retq 639 entry: 640 %0 = insertelement <4 x i32> undef, i32 undef, i32 1 641 %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 642 %2 = bitcast <4 x i32> %1 to <4 x float> 643 store <4 x float> %2, <4 x float> * undef 644 ret void 645 } 646 647 ; PR9210 648 define <4 x float> @f(<4 x double>) nounwind { 649 ; SSE-LABEL: f: 650 ; SSE: # %bb.0: # %entry 651 ; SSE-NEXT: cvtpd2ps %xmm1, %xmm1 652 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 653 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 654 ; SSE-NEXT: ret{{[l|q]}} 655 ; 656 ; AVX-LABEL: f: 657 ; AVX: # %bb.0: # %entry 658 ; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 659 ; AVX-NEXT: vzeroupper 660 ; AVX-NEXT: ret{{[l|q]}} 661 entry: 662 %double2float.i = fptrunc <4 x double> %0 to <4 x float> 663 ret <4 x float> %double2float.i 664 } 665 666 define <2 x i64> @test_insert_64_zext(<2 x i64> %i) { 667 ; SSE-LABEL: test_insert_64_zext: 668 ; SSE: # %bb.0: 669 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 670 ; SSE-NEXT: ret{{[l|q]}} 671 ; 672 ; AVX-LABEL: test_insert_64_zext: 673 ; AVX: # %bb.0: 674 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 675 ; AVX-NEXT: ret{{[l|q]}} 676 %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2> 677 ret <2 x i64> %1 678 } 679 680 define <4 x i32> @PR19721(<4 x i32> %i) { 681 ; X86-SSE-LABEL: PR19721: 682 ; X86-SSE: # %bb.0: 683 ; X86-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0 684 ; X86-SSE-NEXT: retl 685 ; 686 ; X86-AVX-LABEL: PR19721: 687 ; X86-AVX: # %bb.0: 688 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 689 ; X86-AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 690 ; X86-AVX-NEXT: retl 691 ; 692 ; X64-SSE-LABEL: PR19721: 693 ; X64-SSE: # %bb.0: 694 ; X64-SSE-NEXT: movq %xmm0, %rax 695 ; X64-SSE-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 696 ; X64-SSE-NEXT: andq %rax, %rcx 697 ; X64-SSE-NEXT: movq %rcx, %xmm1 698 ; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 699 ; X64-SSE-NEXT: retq 700 ; 701 ; X64-AVX1-LABEL: PR19721: 702 ; X64-AVX1: # %bb.0: 703 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 704 ; X64-AVX1-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 705 ; X64-AVX1-NEXT: andq %rax, %rcx 706 ; X64-AVX1-NEXT: vmovq %rcx, %xmm1 707 ; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 708 ; X64-AVX1-NEXT: retq 709 ; 710 ; X64-AVX512-LABEL: PR19721: 711 ; X64-AVX512: # %bb.0: 712 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 713 ; X64-AVX512-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 714 ; X64-AVX512-NEXT: andq %rax, %rcx 715 ; X64-AVX512-NEXT: vmovq %rcx, %xmm1 716 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 717 ; X64-AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 718 ; X64-AVX512-NEXT: retq 719 %bc = bitcast <4 x i32> %i to i128 720 %insert = and i128 %bc, -4294967296 721 %bc2 = bitcast i128 %insert to <4 x i32> 722 ret <4 x i32> %bc2 723 } 724 725 define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) { 726 ; SSE-LABEL: test_mul: 727 ; SSE: # %bb.0: 728 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 729 ; SSE-NEXT: pmuludq %xmm1, %xmm0 730 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 731 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 732 ; SSE-NEXT: pmuludq %xmm2, %xmm1 733 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 734 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 735 ; SSE-NEXT: ret{{[l|q]}} 736 ; 737 ; AVX-LABEL: test_mul: 738 ; AVX: # %bb.0: 739 ; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 740 ; AVX-NEXT: ret{{[l|q]}} 741 %m = mul <4 x i32> %x, %y 742 ret <4 x i32> %m 743 } 744