1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse41-builtins.c 10 11 define <2 x i64> @test_mm_blend_epi16(<2 x i64> %a0, <2 x i64> %a1) { 12 ; SSE-LABEL: test_mm_blend_epi16: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7] 15 ; SSE-NEXT: ret{{[l|q]}} 16 ; 17 ; AVX-LABEL: test_mm_blend_epi16: 18 ; AVX: # %bb.0: 19 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7] 20 ; AVX-NEXT: ret{{[l|q]}} 21 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 22 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 23 %shuf = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7> 24 %res = bitcast <8 x i16> %shuf to <2 x i64> 25 ret <2 x i64> %res 26 } 27 28 define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) { 29 ; SSE-LABEL: test_mm_blend_pd: 30 ; SSE: # %bb.0: 31 ; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 32 ; SSE-NEXT: ret{{[l|q]}} 33 ; 34 ; AVX-LABEL: test_mm_blend_pd: 35 ; AVX: # %bb.0: 36 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 37 ; AVX-NEXT: ret{{[l|q]}} 38 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3> 39 ret <2 x double> %res 40 } 41 42 define <4 x float> @test_mm_blend_ps(<4 x float> %a0, <4 x float> %a1) { 43 ; SSE-LABEL: test_mm_blend_ps: 44 ; SSE: # %bb.0: 45 ; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 46 ; SSE-NEXT: ret{{[l|q]}} 47 ; 48 ; AVX-LABEL: test_mm_blend_ps: 49 ; AVX: # %bb.0: 50 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 51 ; AVX-NEXT: ret{{[l|q]}} 52 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 53 ret <4 x float> %res 54 } 55 56 define <2 x i64> @test_mm_blendv_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 57 ; SSE-LABEL: test_mm_blendv_epi8: 58 ; SSE: # %bb.0: 59 ; SSE-NEXT: movdqa %xmm0, %xmm3 60 ; SSE-NEXT: movaps %xmm2, %xmm0 61 ; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 62 ; SSE-NEXT: movdqa %xmm3, %xmm0 63 ; SSE-NEXT: ret{{[l|q]}} 64 ; 65 ; AVX-LABEL: test_mm_blendv_epi8: 66 ; AVX: # %bb.0: 67 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 68 ; AVX-NEXT: ret{{[l|q]}} 69 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 70 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 71 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 72 %call = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2) 73 %res = bitcast <16 x i8> %call to <2 x i64> 74 ret <2 x i64> %res 75 } 76 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 77 78 define <2 x double> @test_mm_blendv_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 79 ; SSE-LABEL: test_mm_blendv_pd: 80 ; SSE: # %bb.0: 81 ; SSE-NEXT: movapd %xmm0, %xmm3 82 ; SSE-NEXT: movaps %xmm2, %xmm0 83 ; SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 84 ; SSE-NEXT: movapd %xmm3, %xmm0 85 ; SSE-NEXT: ret{{[l|q]}} 86 ; 87 ; AVX-LABEL: test_mm_blendv_pd: 88 ; AVX: # %bb.0: 89 ; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 90 ; AVX-NEXT: ret{{[l|q]}} 91 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 92 ret <2 x double> %res 93 } 94 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 95 96 define <4 x float> @test_mm_blendv_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 97 ; SSE-LABEL: test_mm_blendv_ps: 98 ; SSE: # %bb.0: 99 ; SSE-NEXT: movaps %xmm0, %xmm3 100 ; SSE-NEXT: movaps %xmm2, %xmm0 101 ; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 102 ; SSE-NEXT: movaps %xmm3, %xmm0 103 ; SSE-NEXT: ret{{[l|q]}} 104 ; 105 ; AVX-LABEL: test_mm_blendv_ps: 106 ; AVX: # %bb.0: 107 ; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 108 ; AVX-NEXT: ret{{[l|q]}} 109 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 110 ret <4 x float> %res 111 } 112 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 113 114 define <2 x double> @test_mm_ceil_pd(<2 x double> %a0) { 115 ; SSE-LABEL: test_mm_ceil_pd: 116 ; SSE: # %bb.0: 117 ; SSE-NEXT: roundpd $2, %xmm0, %xmm0 118 ; SSE-NEXT: ret{{[l|q]}} 119 ; 120 ; AVX-LABEL: test_mm_ceil_pd: 121 ; AVX: # %bb.0: 122 ; AVX-NEXT: vroundpd $2, %xmm0, %xmm0 123 ; AVX-NEXT: ret{{[l|q]}} 124 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 2) 125 ret <2 x double> %res 126 } 127 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 128 129 define <4 x float> @test_mm_ceil_ps(<4 x float> %a0) { 130 ; SSE-LABEL: test_mm_ceil_ps: 131 ; SSE: # %bb.0: 132 ; SSE-NEXT: roundps $2, %xmm0, %xmm0 133 ; SSE-NEXT: ret{{[l|q]}} 134 ; 135 ; AVX-LABEL: test_mm_ceil_ps: 136 ; AVX: # %bb.0: 137 ; AVX-NEXT: vroundps $2, %xmm0, %xmm0 138 ; AVX-NEXT: ret{{[l|q]}} 139 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 2) 140 ret <4 x float> %res 141 } 142 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 143 144 define <2 x double> @test_mm_ceil_sd(<2 x double> %a0, <2 x double> %a1) { 145 ; SSE-LABEL: test_mm_ceil_sd: 146 ; SSE: # %bb.0: 147 ; SSE-NEXT: roundsd $2, %xmm1, %xmm0 148 ; SSE-NEXT: ret{{[l|q]}} 149 ; 150 ; AVX-LABEL: test_mm_ceil_sd: 151 ; AVX: # %bb.0: 152 ; AVX-NEXT: vroundsd $2, %xmm1, %xmm0, %xmm0 153 ; AVX-NEXT: ret{{[l|q]}} 154 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 2) 155 ret <2 x double> %res 156 } 157 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 158 159 define <4 x float> @test_mm_ceil_ss(<4 x float> %a0, <4 x float> %a1) { 160 ; SSE-LABEL: test_mm_ceil_ss: 161 ; SSE: # %bb.0: 162 ; SSE-NEXT: roundss $2, %xmm1, %xmm0 163 ; SSE-NEXT: ret{{[l|q]}} 164 ; 165 ; AVX-LABEL: test_mm_ceil_ss: 166 ; AVX: # %bb.0: 167 ; AVX-NEXT: vroundss $2, %xmm1, %xmm0, %xmm0 168 ; AVX-NEXT: ret{{[l|q]}} 169 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 2) 170 ret <4 x float> %res 171 } 172 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 173 174 define <2 x i64> @test_mm_cmpeq_epi64(<2 x i64> %a0, <2 x i64> %a1) { 175 ; SSE-LABEL: test_mm_cmpeq_epi64: 176 ; SSE: # %bb.0: 177 ; SSE-NEXT: pcmpeqq %xmm1, %xmm0 178 ; SSE-NEXT: ret{{[l|q]}} 179 ; 180 ; AVX1-LABEL: test_mm_cmpeq_epi64: 181 ; AVX1: # %bb.0: 182 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 183 ; AVX1-NEXT: ret{{[l|q]}} 184 ; 185 ; AVX512-LABEL: test_mm_cmpeq_epi64: 186 ; AVX512: # %bb.0: 187 ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 188 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 189 ; AVX512-NEXT: ret{{[l|q]}} 190 %cmp = icmp eq <2 x i64> %a0, %a1 191 %res = sext <2 x i1> %cmp to <2 x i64> 192 ret <2 x i64> %res 193 } 194 195 define <2 x i64> @test_mm_cvtepi8_epi16(<2 x i64> %a0) { 196 ; SSE-LABEL: test_mm_cvtepi8_epi16: 197 ; SSE: # %bb.0: 198 ; SSE-NEXT: pmovsxbw %xmm0, %xmm0 199 ; SSE-NEXT: ret{{[l|q]}} 200 ; 201 ; AVX-LABEL: test_mm_cvtepi8_epi16: 202 ; AVX: # %bb.0: 203 ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 204 ; AVX-NEXT: ret{{[l|q]}} 205 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 206 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 207 %sext = sext <8 x i8> %ext0 to <8 x i16> 208 %res = bitcast <8 x i16> %sext to <2 x i64> 209 ret <2 x i64> %res 210 } 211 212 define <2 x i64> @test_mm_cvtepi8_epi32(<2 x i64> %a0) { 213 ; SSE-LABEL: test_mm_cvtepi8_epi32: 214 ; SSE: # %bb.0: 215 ; SSE-NEXT: pmovsxbd %xmm0, %xmm0 216 ; SSE-NEXT: ret{{[l|q]}} 217 ; 218 ; AVX-LABEL: test_mm_cvtepi8_epi32: 219 ; AVX: # %bb.0: 220 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 221 ; AVX-NEXT: ret{{[l|q]}} 222 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 223 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 224 %sext = sext <4 x i8> %ext0 to <4 x i32> 225 %res = bitcast <4 x i32> %sext to <2 x i64> 226 ret <2 x i64> %res 227 } 228 229 define <2 x i64> @test_mm_cvtepi8_epi64(<2 x i64> %a0) { 230 ; SSE-LABEL: test_mm_cvtepi8_epi64: 231 ; SSE: # %bb.0: 232 ; SSE-NEXT: pmovsxbq %xmm0, %xmm0 233 ; SSE-NEXT: ret{{[l|q]}} 234 ; 235 ; AVX-LABEL: test_mm_cvtepi8_epi64: 236 ; AVX: # %bb.0: 237 ; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 238 ; AVX-NEXT: ret{{[l|q]}} 239 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 240 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 241 %sext = sext <2 x i8> %ext0 to <2 x i64> 242 ret <2 x i64> %sext 243 } 244 245 define <2 x i64> @test_mm_cvtepi16_epi32(<2 x i64> %a0) { 246 ; SSE-LABEL: test_mm_cvtepi16_epi32: 247 ; SSE: # %bb.0: 248 ; SSE-NEXT: pmovsxwd %xmm0, %xmm0 249 ; SSE-NEXT: ret{{[l|q]}} 250 ; 251 ; AVX-LABEL: test_mm_cvtepi16_epi32: 252 ; AVX: # %bb.0: 253 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 254 ; AVX-NEXT: ret{{[l|q]}} 255 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 256 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 257 %sext = sext <4 x i16> %ext0 to <4 x i32> 258 %res = bitcast <4 x i32> %sext to <2 x i64> 259 ret <2 x i64> %res 260 } 261 262 define <2 x i64> @test_mm_cvtepi16_epi64(<2 x i64> %a0) { 263 ; SSE-LABEL: test_mm_cvtepi16_epi64: 264 ; SSE: # %bb.0: 265 ; SSE-NEXT: pmovsxwq %xmm0, %xmm0 266 ; SSE-NEXT: ret{{[l|q]}} 267 ; 268 ; AVX-LABEL: test_mm_cvtepi16_epi64: 269 ; AVX: # %bb.0: 270 ; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 271 ; AVX-NEXT: ret{{[l|q]}} 272 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 273 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 274 %sext = sext <2 x i16> %ext0 to <2 x i64> 275 ret <2 x i64> %sext 276 } 277 278 define <2 x i64> @test_mm_cvtepi32_epi64(<2 x i64> %a0) { 279 ; SSE-LABEL: test_mm_cvtepi32_epi64: 280 ; SSE: # %bb.0: 281 ; SSE-NEXT: pmovsxdq %xmm0, %xmm0 282 ; SSE-NEXT: ret{{[l|q]}} 283 ; 284 ; AVX-LABEL: test_mm_cvtepi32_epi64: 285 ; AVX: # %bb.0: 286 ; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 287 ; AVX-NEXT: ret{{[l|q]}} 288 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 289 %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 290 %sext = sext <2 x i32> %ext0 to <2 x i64> 291 ret <2 x i64> %sext 292 } 293 294 define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) { 295 ; SSE-LABEL: test_mm_cvtepu8_epi16: 296 ; SSE: # %bb.0: 297 ; SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 298 ; SSE-NEXT: ret{{[l|q]}} 299 ; 300 ; AVX-LABEL: test_mm_cvtepu8_epi16: 301 ; AVX: # %bb.0: 302 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 303 ; AVX-NEXT: ret{{[l|q]}} 304 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 305 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 306 %sext = zext <8 x i8> %ext0 to <8 x i16> 307 %res = bitcast <8 x i16> %sext to <2 x i64> 308 ret <2 x i64> %res 309 } 310 311 define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) { 312 ; SSE-LABEL: test_mm_cvtepu8_epi32: 313 ; SSE: # %bb.0: 314 ; SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 315 ; SSE-NEXT: ret{{[l|q]}} 316 ; 317 ; AVX-LABEL: test_mm_cvtepu8_epi32: 318 ; AVX: # %bb.0: 319 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 320 ; AVX-NEXT: ret{{[l|q]}} 321 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 322 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 323 %sext = zext <4 x i8> %ext0 to <4 x i32> 324 %res = bitcast <4 x i32> %sext to <2 x i64> 325 ret <2 x i64> %res 326 } 327 328 define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) { 329 ; SSE-LABEL: test_mm_cvtepu8_epi64: 330 ; SSE: # %bb.0: 331 ; SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 332 ; SSE-NEXT: ret{{[l|q]}} 333 ; 334 ; AVX-LABEL: test_mm_cvtepu8_epi64: 335 ; AVX: # %bb.0: 336 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 337 ; AVX-NEXT: ret{{[l|q]}} 338 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 339 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 340 %sext = zext <2 x i8> %ext0 to <2 x i64> 341 ret <2 x i64> %sext 342 } 343 344 define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) { 345 ; SSE-LABEL: test_mm_cvtepu16_epi32: 346 ; SSE: # %bb.0: 347 ; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 348 ; SSE-NEXT: ret{{[l|q]}} 349 ; 350 ; AVX-LABEL: test_mm_cvtepu16_epi32: 351 ; AVX: # %bb.0: 352 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 353 ; AVX-NEXT: ret{{[l|q]}} 354 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 355 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 356 %sext = zext <4 x i16> %ext0 to <4 x i32> 357 %res = bitcast <4 x i32> %sext to <2 x i64> 358 ret <2 x i64> %res 359 } 360 361 define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) { 362 ; SSE-LABEL: test_mm_cvtepu16_epi64: 363 ; SSE: # %bb.0: 364 ; SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 365 ; SSE-NEXT: ret{{[l|q]}} 366 ; 367 ; AVX-LABEL: test_mm_cvtepu16_epi64: 368 ; AVX: # %bb.0: 369 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 370 ; AVX-NEXT: ret{{[l|q]}} 371 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 372 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 373 %sext = zext <2 x i16> %ext0 to <2 x i64> 374 ret <2 x i64> %sext 375 } 376 377 define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) { 378 ; SSE-LABEL: test_mm_cvtepu32_epi64: 379 ; SSE: # %bb.0: 380 ; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 381 ; SSE-NEXT: ret{{[l|q]}} 382 ; 383 ; AVX-LABEL: test_mm_cvtepu32_epi64: 384 ; AVX: # %bb.0: 385 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 386 ; AVX-NEXT: ret{{[l|q]}} 387 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 388 %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 389 %sext = zext <2 x i32> %ext0 to <2 x i64> 390 ret <2 x i64> %sext 391 } 392 393 define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) { 394 ; SSE-LABEL: test_mm_dp_pd: 395 ; SSE: # %bb.0: 396 ; SSE-NEXT: dppd $7, %xmm1, %xmm0 397 ; SSE-NEXT: ret{{[l|q]}} 398 ; 399 ; AVX-LABEL: test_mm_dp_pd: 400 ; AVX: # %bb.0: 401 ; AVX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 402 ; AVX-NEXT: ret{{[l|q]}} 403 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 404 ret <2 x double> %res 405 } 406 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 407 408 define <4 x float> @test_mm_dp_ps(<4 x float> %a0, <4 x float> %a1) { 409 ; SSE-LABEL: test_mm_dp_ps: 410 ; SSE: # %bb.0: 411 ; SSE-NEXT: dpps $7, %xmm1, %xmm0 412 ; SSE-NEXT: ret{{[l|q]}} 413 ; 414 ; AVX-LABEL: test_mm_dp_ps: 415 ; AVX: # %bb.0: 416 ; AVX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 417 ; AVX-NEXT: ret{{[l|q]}} 418 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 419 ret <4 x float> %res 420 } 421 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 422 423 define i32 @test_mm_extract_epi8(<2 x i64> %a0) { 424 ; SSE-LABEL: test_mm_extract_epi8: 425 ; SSE: # %bb.0: 426 ; SSE-NEXT: pextrb $1, %xmm0, %eax 427 ; SSE-NEXT: movzbl %al, %eax 428 ; SSE-NEXT: ret{{[l|q]}} 429 ; 430 ; AVX-LABEL: test_mm_extract_epi8: 431 ; AVX: # %bb.0: 432 ; AVX-NEXT: vpextrb $1, %xmm0, %eax 433 ; AVX-NEXT: movzbl %al, %eax 434 ; AVX-NEXT: ret{{[l|q]}} 435 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 436 %ext = extractelement <16 x i8> %arg0, i32 1 437 %res = zext i8 %ext to i32 438 ret i32 %res 439 } 440 441 define i32 @test_mm_extract_epi32(<2 x i64> %a0) { 442 ; SSE-LABEL: test_mm_extract_epi32: 443 ; SSE: # %bb.0: 444 ; SSE-NEXT: extractps $1, %xmm0, %eax 445 ; SSE-NEXT: ret{{[l|q]}} 446 ; 447 ; AVX-LABEL: test_mm_extract_epi32: 448 ; AVX: # %bb.0: 449 ; AVX-NEXT: vextractps $1, %xmm0, %eax 450 ; AVX-NEXT: ret{{[l|q]}} 451 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 452 %ext = extractelement <4 x i32> %arg0, i32 1 453 ret i32 %ext 454 } 455 456 define i64 @test_mm_extract_epi64(<2 x i64> %a0) { 457 ; X86-SSE-LABEL: test_mm_extract_epi64: 458 ; X86-SSE: # %bb.0: 459 ; X86-SSE-NEXT: extractps $2, %xmm0, %eax 460 ; X86-SSE-NEXT: extractps $3, %xmm0, %edx 461 ; X86-SSE-NEXT: retl 462 ; 463 ; X86-AVX-LABEL: test_mm_extract_epi64: 464 ; X86-AVX: # %bb.0: 465 ; X86-AVX-NEXT: vextractps $2, %xmm0, %eax 466 ; X86-AVX-NEXT: vextractps $3, %xmm0, %edx 467 ; X86-AVX-NEXT: retl 468 ; 469 ; X64-SSE-LABEL: test_mm_extract_epi64: 470 ; X64-SSE: # %bb.0: 471 ; X64-SSE-NEXT: pextrq $1, %xmm0, %rax 472 ; X64-SSE-NEXT: retq 473 ; 474 ; X64-AVX-LABEL: test_mm_extract_epi64: 475 ; X64-AVX: # %bb.0: 476 ; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax 477 ; X64-AVX-NEXT: retq 478 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 479 %ext = extractelement <2 x i64> %a0, i32 1 480 ret i64 %ext 481 } 482 483 define i32 @test_mm_extract_ps(<4 x float> %a0) { 484 ; SSE-LABEL: test_mm_extract_ps: 485 ; SSE: # %bb.0: 486 ; SSE-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 487 ; SSE-NEXT: movd %xmm0, %eax 488 ; SSE-NEXT: ret{{[l|q]}} 489 ; 490 ; AVX-LABEL: test_mm_extract_ps: 491 ; AVX: # %bb.0: 492 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 493 ; AVX-NEXT: vmovd %xmm0, %eax 494 ; AVX-NEXT: ret{{[l|q]}} 495 %ext = extractelement <4 x float> %a0, i32 1 496 %bc = bitcast float %ext to i32 497 ret i32 %bc 498 } 499 500 define <2 x double> @test_mm_floor_pd(<2 x double> %a0) { 501 ; SSE-LABEL: test_mm_floor_pd: 502 ; SSE: # %bb.0: 503 ; SSE-NEXT: roundpd $1, %xmm0, %xmm0 504 ; SSE-NEXT: ret{{[l|q]}} 505 ; 506 ; AVX-LABEL: test_mm_floor_pd: 507 ; AVX: # %bb.0: 508 ; AVX-NEXT: vroundpd $1, %xmm0, %xmm0 509 ; AVX-NEXT: ret{{[l|q]}} 510 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 1) 511 ret <2 x double> %res 512 } 513 514 define <4 x float> @test_mm_floor_ps(<4 x float> %a0) { 515 ; SSE-LABEL: test_mm_floor_ps: 516 ; SSE: # %bb.0: 517 ; SSE-NEXT: roundps $1, %xmm0, %xmm0 518 ; SSE-NEXT: ret{{[l|q]}} 519 ; 520 ; AVX-LABEL: test_mm_floor_ps: 521 ; AVX: # %bb.0: 522 ; AVX-NEXT: vroundps $1, %xmm0, %xmm0 523 ; AVX-NEXT: ret{{[l|q]}} 524 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 1) 525 ret <4 x float> %res 526 } 527 528 define <2 x double> @test_mm_floor_sd(<2 x double> %a0, <2 x double> %a1) { 529 ; SSE-LABEL: test_mm_floor_sd: 530 ; SSE: # %bb.0: 531 ; SSE-NEXT: roundsd $1, %xmm1, %xmm0 532 ; SSE-NEXT: ret{{[l|q]}} 533 ; 534 ; AVX-LABEL: test_mm_floor_sd: 535 ; AVX: # %bb.0: 536 ; AVX-NEXT: vroundsd $1, %xmm1, %xmm0, %xmm0 537 ; AVX-NEXT: ret{{[l|q]}} 538 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 1) 539 ret <2 x double> %res 540 } 541 542 define <4 x float> @test_mm_floor_ss(<4 x float> %a0, <4 x float> %a1) { 543 ; SSE-LABEL: test_mm_floor_ss: 544 ; SSE: # %bb.0: 545 ; SSE-NEXT: roundss $1, %xmm1, %xmm0 546 ; SSE-NEXT: ret{{[l|q]}} 547 ; 548 ; AVX-LABEL: test_mm_floor_ss: 549 ; AVX: # %bb.0: 550 ; AVX-NEXT: vroundss $1, %xmm1, %xmm0, %xmm0 551 ; AVX-NEXT: ret{{[l|q]}} 552 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 1) 553 ret <4 x float> %res 554 } 555 556 define <2 x i64> @test_mm_insert_epi8(<2 x i64> %a0, i8 %a1) { 557 ; X86-SSE-LABEL: test_mm_insert_epi8: 558 ; X86-SSE: # %bb.0: 559 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax 560 ; X86-SSE-NEXT: pinsrb $1, %eax, %xmm0 561 ; X86-SSE-NEXT: retl 562 ; 563 ; X86-AVX-LABEL: test_mm_insert_epi8: 564 ; X86-AVX: # %bb.0: 565 ; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax 566 ; X86-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 567 ; X86-AVX-NEXT: retl 568 ; 569 ; X64-SSE-LABEL: test_mm_insert_epi8: 570 ; X64-SSE: # %bb.0: 571 ; X64-SSE-NEXT: movzbl %dil, %eax 572 ; X64-SSE-NEXT: pinsrb $1, %eax, %xmm0 573 ; X64-SSE-NEXT: retq 574 ; 575 ; X64-AVX-LABEL: test_mm_insert_epi8: 576 ; X64-AVX: # %bb.0: 577 ; X64-AVX-NEXT: movzbl %dil, %eax 578 ; X64-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 579 ; X64-AVX-NEXT: retq 580 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 581 %res = insertelement <16 x i8> %arg0, i8 %a1,i32 1 582 %bc = bitcast <16 x i8> %res to <2 x i64> 583 ret <2 x i64> %bc 584 } 585 586 define <2 x i64> @test_mm_insert_epi32(<2 x i64> %a0, i32 %a1) { 587 ; X86-SSE-LABEL: test_mm_insert_epi32: 588 ; X86-SSE: # %bb.0: 589 ; X86-SSE-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 590 ; X86-SSE-NEXT: retl 591 ; 592 ; X86-AVX-LABEL: test_mm_insert_epi32: 593 ; X86-AVX: # %bb.0: 594 ; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 595 ; X86-AVX-NEXT: retl 596 ; 597 ; X64-SSE-LABEL: test_mm_insert_epi32: 598 ; X64-SSE: # %bb.0: 599 ; X64-SSE-NEXT: pinsrd $1, %edi, %xmm0 600 ; X64-SSE-NEXT: retq 601 ; 602 ; X64-AVX-LABEL: test_mm_insert_epi32: 603 ; X64-AVX: # %bb.0: 604 ; X64-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 605 ; X64-AVX-NEXT: retq 606 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 607 %res = insertelement <4 x i32> %arg0, i32 %a1,i32 1 608 %bc = bitcast <4 x i32> %res to <2 x i64> 609 ret <2 x i64> %bc 610 } 611 612 define <2 x i64> @test_mm_insert_epi64(<2 x i64> %a0, i64 %a1) { 613 ; X86-SSE-LABEL: test_mm_insert_epi64: 614 ; X86-SSE: # %bb.0: 615 ; X86-SSE-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0 616 ; X86-SSE-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 617 ; X86-SSE-NEXT: retl 618 ; 619 ; X86-AVX-LABEL: test_mm_insert_epi64: 620 ; X86-AVX: # %bb.0: 621 ; X86-AVX-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 622 ; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 623 ; X86-AVX-NEXT: retl 624 ; 625 ; X64-SSE-LABEL: test_mm_insert_epi64: 626 ; X64-SSE: # %bb.0: 627 ; X64-SSE-NEXT: pinsrq $1, %rdi, %xmm0 628 ; X64-SSE-NEXT: retq 629 ; 630 ; X64-AVX-LABEL: test_mm_insert_epi64: 631 ; X64-AVX: # %bb.0: 632 ; X64-AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 633 ; X64-AVX-NEXT: retq 634 %res = insertelement <2 x i64> %a0, i64 %a1,i32 1 635 ret <2 x i64> %res 636 } 637 638 define <4 x float> @test_mm_insert_ps(<4 x float> %a0, <4 x float> %a1) { 639 ; SSE-LABEL: test_mm_insert_ps: 640 ; SSE: # %bb.0: 641 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 642 ; SSE-NEXT: ret{{[l|q]}} 643 ; 644 ; AVX-LABEL: test_mm_insert_ps: 645 ; AVX: # %bb.0: 646 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 647 ; AVX-NEXT: ret{{[l|q]}} 648 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 4) 649 ret <4 x float> %res 650 } 651 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 652 653 define <2 x i64> @test_mm_max_epi8(<2 x i64> %a0, <2 x i64> %a1) { 654 ; SSE-LABEL: test_mm_max_epi8: 655 ; SSE: # %bb.0: 656 ; SSE-NEXT: pmaxsb %xmm1, %xmm0 657 ; SSE-NEXT: ret{{[l|q]}} 658 ; 659 ; AVX-LABEL: test_mm_max_epi8: 660 ; AVX: # %bb.0: 661 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 662 ; AVX-NEXT: ret{{[l|q]}} 663 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 664 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 665 %cmp = icmp sgt <16 x i8> %arg0, %arg1 666 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 667 %bc = bitcast <16 x i8> %sel to <2 x i64> 668 ret <2 x i64> %bc 669 } 670 671 define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) { 672 ; SSE-LABEL: test_mm_max_epi32: 673 ; SSE: # %bb.0: 674 ; SSE-NEXT: pmaxsd %xmm1, %xmm0 675 ; SSE-NEXT: ret{{[l|q]}} 676 ; 677 ; AVX-LABEL: test_mm_max_epi32: 678 ; AVX: # %bb.0: 679 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 680 ; AVX-NEXT: ret{{[l|q]}} 681 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 682 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 683 %cmp = icmp sgt <4 x i32> %arg0, %arg1 684 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 685 %bc = bitcast <4 x i32> %sel to <2 x i64> 686 ret <2 x i64> %bc 687 } 688 689 define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) { 690 ; SSE-LABEL: test_mm_max_epu16: 691 ; SSE: # %bb.0: 692 ; SSE-NEXT: pmaxuw %xmm1, %xmm0 693 ; SSE-NEXT: ret{{[l|q]}} 694 ; 695 ; AVX-LABEL: test_mm_max_epu16: 696 ; AVX: # %bb.0: 697 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 698 ; AVX-NEXT: ret{{[l|q]}} 699 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 700 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 701 %cmp = icmp ugt <8 x i16> %arg0, %arg1 702 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 703 %bc = bitcast <8 x i16> %sel to <2 x i64> 704 ret <2 x i64> %bc 705 } 706 707 define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) { 708 ; SSE-LABEL: test_mm_max_epu32: 709 ; SSE: # %bb.0: 710 ; SSE-NEXT: pmaxud %xmm1, %xmm0 711 ; SSE-NEXT: ret{{[l|q]}} 712 ; 713 ; AVX-LABEL: test_mm_max_epu32: 714 ; AVX: # %bb.0: 715 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 716 ; AVX-NEXT: ret{{[l|q]}} 717 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 718 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 719 %cmp = icmp ugt <4 x i32> %arg0, %arg1 720 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 721 %bc = bitcast <4 x i32> %sel to <2 x i64> 722 ret <2 x i64> %bc 723 } 724 725 define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) { 726 ; SSE-LABEL: test_mm_min_epi8: 727 ; SSE: # %bb.0: 728 ; SSE-NEXT: pminsb %xmm1, %xmm0 729 ; SSE-NEXT: ret{{[l|q]}} 730 ; 731 ; AVX-LABEL: test_mm_min_epi8: 732 ; AVX: # %bb.0: 733 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 734 ; AVX-NEXT: ret{{[l|q]}} 735 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 736 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 737 %cmp = icmp slt <16 x i8> %arg0, %arg1 738 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 739 %bc = bitcast <16 x i8> %sel to <2 x i64> 740 ret <2 x i64> %bc 741 } 742 743 define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) { 744 ; SSE-LABEL: test_mm_min_epi32: 745 ; SSE: # %bb.0: 746 ; SSE-NEXT: pminsd %xmm1, %xmm0 747 ; SSE-NEXT: ret{{[l|q]}} 748 ; 749 ; AVX-LABEL: test_mm_min_epi32: 750 ; AVX: # %bb.0: 751 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 752 ; AVX-NEXT: ret{{[l|q]}} 753 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 754 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 755 %cmp = icmp slt <4 x i32> %arg0, %arg1 756 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 757 %bc = bitcast <4 x i32> %sel to <2 x i64> 758 ret <2 x i64> %bc 759 } 760 761 define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) { 762 ; SSE-LABEL: test_mm_min_epu16: 763 ; SSE: # %bb.0: 764 ; SSE-NEXT: pminuw %xmm1, %xmm0 765 ; SSE-NEXT: ret{{[l|q]}} 766 ; 767 ; AVX-LABEL: test_mm_min_epu16: 768 ; AVX: # %bb.0: 769 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 770 ; AVX-NEXT: ret{{[l|q]}} 771 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 772 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 773 %cmp = icmp ult <8 x i16> %arg0, %arg1 774 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 775 %bc = bitcast <8 x i16> %sel to <2 x i64> 776 ret <2 x i64> %bc 777 } 778 779 define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) { 780 ; SSE-LABEL: test_mm_min_epu32: 781 ; SSE: # %bb.0: 782 ; SSE-NEXT: pminud %xmm1, %xmm0 783 ; SSE-NEXT: ret{{[l|q]}} 784 ; 785 ; AVX-LABEL: test_mm_min_epu32: 786 ; AVX: # %bb.0: 787 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 788 ; AVX-NEXT: ret{{[l|q]}} 789 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 790 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 791 %cmp = icmp ult <4 x i32> %arg0, %arg1 792 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 793 %bc = bitcast <4 x i32> %sel to <2 x i64> 794 ret <2 x i64> %bc 795 } 796 797 define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) { 798 ; SSE-LABEL: test_mm_minpos_epu16: 799 ; SSE: # %bb.0: 800 ; SSE-NEXT: phminposuw %xmm0, %xmm0 801 ; SSE-NEXT: ret{{[l|q]}} 802 ; 803 ; AVX-LABEL: test_mm_minpos_epu16: 804 ; AVX: # %bb.0: 805 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 806 ; AVX-NEXT: ret{{[l|q]}} 807 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 808 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %arg0) 809 %bc = bitcast <8 x i16> %res to <2 x i64> 810 ret <2 x i64> %bc 811 } 812 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 813 814 define <2 x i64> @test_mm_mpsadbw_epu8(<2 x i64> %a0, <2 x i64> %a1) { 815 ; SSE-LABEL: test_mm_mpsadbw_epu8: 816 ; SSE: # %bb.0: 817 ; SSE-NEXT: mpsadbw $1, %xmm1, %xmm0 818 ; SSE-NEXT: ret{{[l|q]}} 819 ; 820 ; AVX-LABEL: test_mm_mpsadbw_epu8: 821 ; AVX: # %bb.0: 822 ; AVX-NEXT: vmpsadbw $1, %xmm1, %xmm0, %xmm0 823 ; AVX-NEXT: ret{{[l|q]}} 824 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 825 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 826 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %arg0, <16 x i8> %arg1, i8 1) 827 %bc = bitcast <8 x i16> %res to <2 x i64> 828 ret <2 x i64> %bc 829 } 830 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 831 832 define <2 x i64> @test_mm_mul_epi32(<2 x i64> %a0, <2 x i64> %a1) { 833 ; SSE-LABEL: test_mm_mul_epi32: 834 ; SSE: # %bb.0: 835 ; SSE-NEXT: psllq $32, %xmm0 836 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 837 ; SSE-NEXT: psrad $31, %xmm0 838 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] 839 ; SSE-NEXT: psllq $32, %xmm1 840 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 841 ; SSE-NEXT: psrad $31, %xmm1 842 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 843 ; SSE-NEXT: pmuldq %xmm1, %xmm0 844 ; SSE-NEXT: ret{{[l|q]}} 845 ; 846 ; AVX1-LABEL: test_mm_mul_epi32: 847 ; AVX1: # %bb.0: 848 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 849 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 850 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 852 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 853 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 854 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 855 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 856 ; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 857 ; AVX1-NEXT: ret{{[l|q]}} 858 ; 859 ; AVX512-LABEL: test_mm_mul_epi32: 860 ; AVX512: # %bb.0: 861 ; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 862 ; AVX512-NEXT: vpsraq $32, %xmm0, %xmm0 863 ; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1 864 ; AVX512-NEXT: vpsraq $32, %xmm1, %xmm1 865 ; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 866 ; AVX512-NEXT: ret{{[l|q]}} 867 %A = shl <2 x i64> %a0, <i64 32, i64 32> 868 %A1 = ashr exact <2 x i64> %A, <i64 32, i64 32> 869 %B = shl <2 x i64> %a1, <i64 32, i64 32> 870 %B1 = ashr exact <2 x i64> %B, <i64 32, i64 32> 871 %res = mul nsw <2 x i64> %A1, %B1 872 ret <2 x i64> %res 873 } 874 875 define <2 x i64> @test_mm_mullo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 876 ; SSE-LABEL: test_mm_mullo_epi32: 877 ; SSE: # %bb.0: 878 ; SSE-NEXT: pmulld %xmm1, %xmm0 879 ; SSE-NEXT: ret{{[l|q]}} 880 ; 881 ; AVX-LABEL: test_mm_mullo_epi32: 882 ; AVX: # %bb.0: 883 ; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 884 ; AVX-NEXT: ret{{[l|q]}} 885 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 886 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 887 %res = mul <4 x i32> %arg0, %arg1 888 %bc = bitcast <4 x i32> %res to <2 x i64> 889 ret <2 x i64> %bc 890 } 891 892 define <2 x i64> @test_mm_packus_epi32(<2 x i64> %a0, <2 x i64> %a1) { 893 ; SSE-LABEL: test_mm_packus_epi32: 894 ; SSE: # %bb.0: 895 ; SSE-NEXT: packusdw %xmm1, %xmm0 896 ; SSE-NEXT: ret{{[l|q]}} 897 ; 898 ; AVX-LABEL: test_mm_packus_epi32: 899 ; AVX: # %bb.0: 900 ; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 901 ; AVX-NEXT: ret{{[l|q]}} 902 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 903 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 904 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %arg0, <4 x i32> %arg1) 905 %bc = bitcast <8 x i16> %res to <2 x i64> 906 ret <2 x i64> %bc 907 } 908 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 909 910 define <2 x double> @test_mm_round_pd(<2 x double> %a0) { 911 ; SSE-LABEL: test_mm_round_pd: 912 ; SSE: # %bb.0: 913 ; SSE-NEXT: roundpd $4, %xmm0, %xmm0 914 ; SSE-NEXT: ret{{[l|q]}} 915 ; 916 ; AVX-LABEL: test_mm_round_pd: 917 ; AVX: # %bb.0: 918 ; AVX-NEXT: vroundpd $4, %xmm0, %xmm0 919 ; AVX-NEXT: ret{{[l|q]}} 920 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 4) 921 ret <2 x double> %res 922 } 923 924 define <4 x float> @test_mm_round_ps(<4 x float> %a0) { 925 ; SSE-LABEL: test_mm_round_ps: 926 ; SSE: # %bb.0: 927 ; SSE-NEXT: roundps $4, %xmm0, %xmm0 928 ; SSE-NEXT: ret{{[l|q]}} 929 ; 930 ; AVX-LABEL: test_mm_round_ps: 931 ; AVX: # %bb.0: 932 ; AVX-NEXT: vroundps $4, %xmm0, %xmm0 933 ; AVX-NEXT: ret{{[l|q]}} 934 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 4) 935 ret <4 x float> %res 936 } 937 938 define <2 x double> @test_mm_round_sd(<2 x double> %a0, <2 x double> %a1) { 939 ; SSE-LABEL: test_mm_round_sd: 940 ; SSE: # %bb.0: 941 ; SSE-NEXT: roundsd $4, %xmm1, %xmm0 942 ; SSE-NEXT: ret{{[l|q]}} 943 ; 944 ; AVX-LABEL: test_mm_round_sd: 945 ; AVX: # %bb.0: 946 ; AVX-NEXT: vroundsd $4, %xmm1, %xmm0, %xmm0 947 ; AVX-NEXT: ret{{[l|q]}} 948 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 4) 949 ret <2 x double> %res 950 } 951 952 define <4 x float> @test_mm_round_ss(<4 x float> %a0, <4 x float> %a1) { 953 ; SSE-LABEL: test_mm_round_ss: 954 ; SSE: # %bb.0: 955 ; SSE-NEXT: roundss $4, %xmm1, %xmm0 956 ; SSE-NEXT: ret{{[l|q]}} 957 ; 958 ; AVX-LABEL: test_mm_round_ss: 959 ; AVX: # %bb.0: 960 ; AVX-NEXT: vroundss $4, %xmm1, %xmm0, %xmm0 961 ; AVX-NEXT: ret{{[l|q]}} 962 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 4) 963 ret <4 x float> %res 964 } 965 966 define <2 x i64> @test_mm_stream_load_si128(<2 x i64>* %a0) { 967 ; X86-SSE-LABEL: test_mm_stream_load_si128: 968 ; X86-SSE: # %bb.0: 969 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 970 ; X86-SSE-NEXT: movntdqa (%eax), %xmm0 971 ; X86-SSE-NEXT: retl 972 ; 973 ; X86-AVX-LABEL: test_mm_stream_load_si128: 974 ; X86-AVX: # %bb.0: 975 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 976 ; X86-AVX-NEXT: vmovntdqa (%eax), %xmm0 977 ; X86-AVX-NEXT: retl 978 ; 979 ; X64-SSE-LABEL: test_mm_stream_load_si128: 980 ; X64-SSE: # %bb.0: 981 ; X64-SSE-NEXT: movntdqa (%rdi), %xmm0 982 ; X64-SSE-NEXT: retq 983 ; 984 ; X64-AVX-LABEL: test_mm_stream_load_si128: 985 ; X64-AVX: # %bb.0: 986 ; X64-AVX-NEXT: vmovntdqa (%rdi), %xmm0 987 ; X64-AVX-NEXT: retq 988 %arg0 = bitcast <2 x i64>* %a0 to i8* 989 %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %arg0) 990 ret <2 x i64> %res 991 } 992 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone 993 994 define i32 @test_mm_test_all_ones(<2 x i64> %a0) { 995 ; SSE-LABEL: test_mm_test_all_ones: 996 ; SSE: # %bb.0: 997 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 998 ; SSE-NEXT: xorl %eax, %eax 999 ; SSE-NEXT: ptest %xmm1, %xmm0 1000 ; SSE-NEXT: setb %al 1001 ; SSE-NEXT: ret{{[l|q]}} 1002 ; 1003 ; AVX-LABEL: test_mm_test_all_ones: 1004 ; AVX: # %bb.0: 1005 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1006 ; AVX-NEXT: xorl %eax, %eax 1007 ; AVX-NEXT: vptest %xmm1, %xmm0 1008 ; AVX-NEXT: setb %al 1009 ; AVX-NEXT: ret{{[l|q]}} 1010 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> <i64 -1, i64 -1>) 1011 ret i32 %res 1012 } 1013 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1014 1015 define i32 @test_mm_test_all_zeros(<2 x i64> %a0, <2 x i64> %a1) { 1016 ; SSE-LABEL: test_mm_test_all_zeros: 1017 ; SSE: # %bb.0: 1018 ; SSE-NEXT: xorl %eax, %eax 1019 ; SSE-NEXT: ptest %xmm1, %xmm0 1020 ; SSE-NEXT: sete %al 1021 ; SSE-NEXT: ret{{[l|q]}} 1022 ; 1023 ; AVX-LABEL: test_mm_test_all_zeros: 1024 ; AVX: # %bb.0: 1025 ; AVX-NEXT: xorl %eax, %eax 1026 ; AVX-NEXT: vptest %xmm1, %xmm0 1027 ; AVX-NEXT: sete %al 1028 ; AVX-NEXT: ret{{[l|q]}} 1029 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) 1030 ret i32 %res 1031 } 1032 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1033 1034 define i32 @test_mm_test_mix_ones_zeros(<2 x i64> %a0, <2 x i64> %a1) { 1035 ; SSE-LABEL: test_mm_test_mix_ones_zeros: 1036 ; SSE: # %bb.0: 1037 ; SSE-NEXT: xorl %eax, %eax 1038 ; SSE-NEXT: ptest %xmm1, %xmm0 1039 ; SSE-NEXT: seta %al 1040 ; SSE-NEXT: ret{{[l|q]}} 1041 ; 1042 ; AVX-LABEL: test_mm_test_mix_ones_zeros: 1043 ; AVX: # %bb.0: 1044 ; AVX-NEXT: xorl %eax, %eax 1045 ; AVX-NEXT: vptest %xmm1, %xmm0 1046 ; AVX-NEXT: seta %al 1047 ; AVX-NEXT: ret{{[l|q]}} 1048 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) 1049 ret i32 %res 1050 } 1051 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1052 1053 define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) { 1054 ; SSE-LABEL: test_mm_testc_si128: 1055 ; SSE: # %bb.0: 1056 ; SSE-NEXT: xorl %eax, %eax 1057 ; SSE-NEXT: ptest %xmm1, %xmm0 1058 ; SSE-NEXT: setb %al 1059 ; SSE-NEXT: ret{{[l|q]}} 1060 ; 1061 ; AVX-LABEL: test_mm_testc_si128: 1062 ; AVX: # %bb.0: 1063 ; AVX-NEXT: xorl %eax, %eax 1064 ; AVX-NEXT: vptest %xmm1, %xmm0 1065 ; AVX-NEXT: setb %al 1066 ; AVX-NEXT: ret{{[l|q]}} 1067 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) 1068 ret i32 %res 1069 } 1070 1071 define i32 @test_mm_testnzc_si128(<2 x i64> %a0, <2 x i64> %a1) { 1072 ; SSE-LABEL: test_mm_testnzc_si128: 1073 ; SSE: # %bb.0: 1074 ; SSE-NEXT: xorl %eax, %eax 1075 ; SSE-NEXT: ptest %xmm1, %xmm0 1076 ; SSE-NEXT: seta %al 1077 ; SSE-NEXT: ret{{[l|q]}} 1078 ; 1079 ; AVX-LABEL: test_mm_testnzc_si128: 1080 ; AVX: # %bb.0: 1081 ; AVX-NEXT: xorl %eax, %eax 1082 ; AVX-NEXT: vptest %xmm1, %xmm0 1083 ; AVX-NEXT: seta %al 1084 ; AVX-NEXT: ret{{[l|q]}} 1085 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) 1086 ret i32 %res 1087 } 1088 1089 define i32 @test_mm_testz_si128(<2 x i64> %a0, <2 x i64> %a1) { 1090 ; SSE-LABEL: test_mm_testz_si128: 1091 ; SSE: # %bb.0: 1092 ; SSE-NEXT: xorl %eax, %eax 1093 ; SSE-NEXT: ptest %xmm1, %xmm0 1094 ; SSE-NEXT: sete %al 1095 ; SSE-NEXT: ret{{[l|q]}} 1096 ; 1097 ; AVX-LABEL: test_mm_testz_si128: 1098 ; AVX: # %bb.0: 1099 ; AVX-NEXT: xorl %eax, %eax 1100 ; AVX-NEXT: vptest %xmm1, %xmm0 1101 ; AVX-NEXT: sete %al 1102 ; AVX-NEXT: ret{{[l|q]}} 1103 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) 1104 ret i32 %res 1105 } 1106