1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s 2 3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 4 ; CHECK-LABEL: test_x86_aesni_aesdec: 5 ; CHECK: # BB#0: 6 ; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0 7 ; CHECK-NEXT: retl 8 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 9 ret <2 x i64> %res 10 } 11 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 12 13 14 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 15 ; CHECK-LABEL: test_x86_aesni_aesdeclast: 16 ; CHECK: # BB#0: 17 ; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 18 ; CHECK-NEXT: retl 19 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 20 ret <2 x i64> %res 21 } 22 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 23 24 25 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 26 ; CHECK-LABEL: test_x86_aesni_aesenc: 27 ; CHECK: # BB#0: 28 ; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0 29 ; CHECK-NEXT: retl 30 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 31 ret <2 x i64> %res 32 } 33 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 34 35 36 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 37 ; CHECK-LABEL: test_x86_aesni_aesenclast: 38 ; CHECK: # BB#0: 39 ; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 40 ; CHECK-NEXT: retl 41 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 42 ret <2 x i64> %res 43 } 44 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 45 46 47 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 48 ; CHECK-LABEL: test_x86_aesni_aesimc: 49 ; CHECK: # BB#0: 50 ; CHECK-NEXT: vaesimc %xmm0, %xmm0 51 ; CHECK-NEXT: retl 52 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 53 ret <2 x i64> %res 54 } 55 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 56 57 58 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 59 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist: 60 ; CHECK: # BB#0: 61 ; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 62 ; CHECK-NEXT: retl 63 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 64 ret <2 x i64> %res 65 } 66 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 67 68 69 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 70 ; CHECK-LABEL: test_x86_sse2_add_sd: 71 ; CHECK: # BB#0: 72 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 73 ; CHECK-NEXT: retl 74 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 75 ret <2 x double> %res 76 } 77 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 78 79 80 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 81 ; CHECK-LABEL: test_x86_sse2_cmp_pd: 82 ; CHECK: # BB#0: 83 ; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 84 ; CHECK-NEXT: retl 85 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 86 ret <2 x double> %res 87 } 88 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 89 90 91 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 92 ; CHECK-LABEL: test_x86_sse2_cmp_sd: 93 ; CHECK: # BB#0: 94 ; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 95 ; CHECK-NEXT: retl 96 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 97 ret <2 x double> %res 98 } 99 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 100 101 102 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 103 ; CHECK-LABEL: test_x86_sse2_comieq_sd: 104 ; CHECK: # BB#0: 105 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 106 ; CHECK-NEXT: sete %al 107 ; CHECK-NEXT: movzbl %al, %eax 108 ; CHECK-NEXT: retl 109 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 110 ret i32 %res 111 } 112 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 113 114 115 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 116 ; CHECK-LABEL: test_x86_sse2_comige_sd: 117 ; CHECK: # BB#0: 118 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 119 ; CHECK-NEXT: setae %al 120 ; CHECK-NEXT: movzbl %al, %eax 121 ; CHECK-NEXT: retl 122 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 123 ret i32 %res 124 } 125 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 126 127 128 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 129 ; CHECK-LABEL: test_x86_sse2_comigt_sd: 130 ; CHECK: # BB#0: 131 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 132 ; CHECK-NEXT: seta %al 133 ; CHECK-NEXT: movzbl %al, %eax 134 ; CHECK-NEXT: retl 135 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 136 ret i32 %res 137 } 138 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 139 140 141 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 142 ; CHECK-LABEL: test_x86_sse2_comile_sd: 143 ; CHECK: # BB#0: 144 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 145 ; CHECK-NEXT: setbe %al 146 ; CHECK-NEXT: movzbl %al, %eax 147 ; CHECK-NEXT: retl 148 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 149 ret i32 %res 150 } 151 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 152 153 154 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 155 ; CHECK-LABEL: test_x86_sse2_comilt_sd: 156 ; CHECK: # BB#0: 157 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 158 ; CHECK-NEXT: sbbl %eax, %eax 159 ; CHECK-NEXT: andl $1, %eax 160 ; CHECK-NEXT: retl 161 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 162 ret i32 %res 163 } 164 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 165 166 167 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 168 ; CHECK-LABEL: test_x86_sse2_comineq_sd: 169 ; CHECK: # BB#0: 170 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 171 ; CHECK-NEXT: setne %al 172 ; CHECK-NEXT: movzbl %al, %eax 173 ; CHECK-NEXT: retl 174 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 175 ret i32 %res 176 } 177 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 178 179 180 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 181 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd: 182 ; CHECK: # BB#0: 183 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 184 ; CHECK-NEXT: retl 185 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 186 ret <2 x double> %res 187 } 188 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 189 190 191 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 192 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps: 193 ; CHECK: # BB#0: 194 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 195 ; CHECK-NEXT: retl 196 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 197 ret <4 x float> %res 198 } 199 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 200 201 202 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 203 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq: 204 ; CHECK: # BB#0: 205 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 206 ; CHECK-NEXT: retl 207 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 208 ret <4 x i32> %res 209 } 210 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 211 212 213 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 214 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps: 215 ; CHECK: # BB#0: 216 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 217 ; CHECK-NEXT: retl 218 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 219 ret <4 x float> %res 220 } 221 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 222 223 224 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 225 ; CHECK-LABEL: test_x86_sse2_cvtps2dq: 226 ; CHECK: # BB#0: 227 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 228 ; CHECK-NEXT: retl 229 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 230 ret <4 x i32> %res 231 } 232 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 233 234 235 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 236 ; CHECK-LABEL: test_x86_sse2_cvtps2pd: 237 ; CHECK: # BB#0: 238 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 239 ; CHECK-NEXT: retl 240 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 241 ret <2 x double> %res 242 } 243 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 244 245 246 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 247 ; CHECK-LABEL: test_x86_sse2_cvtsd2si: 248 ; CHECK: # BB#0: 249 ; CHECK-NEXT: vcvtsd2si %xmm0, %eax 250 ; CHECK-NEXT: retl 251 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 252 ret i32 %res 253 } 254 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 255 256 257 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 258 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss: 259 ; CHECK: # BB#0: 260 ; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 261 ; CHECK-NEXT: retl 262 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 263 ret <4 x float> %res 264 } 265 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 266 267 268 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 269 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd: 270 ; CHECK: # BB#0: 271 ; CHECK-NEXT: movl $7, %eax 272 ; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 273 ; CHECK-NEXT: retl 274 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 275 ret <2 x double> %res 276 } 277 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 278 279 280 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 281 ; CHECK-LABEL: test_x86_sse2_cvtss2sd: 282 ; CHECK: # BB#0: 283 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 284 ; CHECK-NEXT: retl 285 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 286 ret <2 x double> %res 287 } 288 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 289 290 291 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 292 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq: 293 ; CHECK: # BB#0: 294 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 295 ; CHECK-NEXT: retl 296 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 297 ret <4 x i32> %res 298 } 299 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 300 301 302 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 303 ; CHECK-LABEL: test_x86_sse2_cvttps2dq: 304 ; CHECK: # BB#0: 305 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 306 ; CHECK-NEXT: retl 307 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 308 ret <4 x i32> %res 309 } 310 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 311 312 313 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 314 ; CHECK-LABEL: test_x86_sse2_cvttsd2si: 315 ; CHECK: # BB#0: 316 ; CHECK-NEXT: vcvttsd2si %xmm0, %eax 317 ; CHECK-NEXT: retl 318 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 319 ret i32 %res 320 } 321 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 322 323 324 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 325 ; CHECK-LABEL: test_x86_sse2_div_sd: 326 ; CHECK: # BB#0: 327 ; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 328 ; CHECK-NEXT: retl 329 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 330 ret <2 x double> %res 331 } 332 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 333 334 335 336 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 337 ; CHECK-LABEL: test_x86_sse2_max_pd: 338 ; CHECK: # BB#0: 339 ; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 340 ; CHECK-NEXT: retl 341 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 342 ret <2 x double> %res 343 } 344 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 345 346 347 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 348 ; CHECK-LABEL: test_x86_sse2_max_sd: 349 ; CHECK: # BB#0: 350 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 351 ; CHECK-NEXT: retl 352 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 353 ret <2 x double> %res 354 } 355 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 356 357 358 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 359 ; CHECK-LABEL: test_x86_sse2_min_pd: 360 ; CHECK: # BB#0: 361 ; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0 362 ; CHECK-NEXT: retl 363 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 364 ret <2 x double> %res 365 } 366 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 367 368 369 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 370 ; CHECK-LABEL: test_x86_sse2_min_sd: 371 ; CHECK: # BB#0: 372 ; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 373 ; CHECK-NEXT: retl 374 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 375 ret <2 x double> %res 376 } 377 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 378 379 380 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 381 ; CHECK-LABEL: test_x86_sse2_movmsk_pd: 382 ; CHECK: # BB#0: 383 ; CHECK-NEXT: vmovmskpd %xmm0, %eax 384 ; CHECK-NEXT: retl 385 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 386 ret i32 %res 387 } 388 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 389 390 391 392 393 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 394 ; CHECK-LABEL: test_x86_sse2_mul_sd: 395 ; CHECK: # BB#0: 396 ; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 397 ; CHECK-NEXT: retl 398 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 399 ret <2 x double> %res 400 } 401 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 402 403 404 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 405 ; CHECK-LABEL: test_x86_sse2_packssdw_128: 406 ; CHECK: # BB#0: 407 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 408 ; CHECK-NEXT: retl 409 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 410 ret <8 x i16> %res 411 } 412 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 413 414 415 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 416 ; CHECK-LABEL: test_x86_sse2_packsswb_128: 417 ; CHECK: # BB#0: 418 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 419 ; CHECK-NEXT: retl 420 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 421 ret <16 x i8> %res 422 } 423 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 424 425 426 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 427 ; CHECK-LABEL: test_x86_sse2_packuswb_128: 428 ; CHECK: # BB#0: 429 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 430 ; CHECK-NEXT: retl 431 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 432 ret <16 x i8> %res 433 } 434 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 435 436 437 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 438 ; CHECK-LABEL: test_x86_sse2_padds_b: 439 ; CHECK: # BB#0: 440 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 441 ; CHECK-NEXT: retl 442 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 443 ret <16 x i8> %res 444 } 445 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 446 447 448 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 449 ; CHECK-LABEL: test_x86_sse2_padds_w: 450 ; CHECK: # BB#0: 451 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 452 ; CHECK-NEXT: retl 453 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 454 ret <8 x i16> %res 455 } 456 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 457 458 459 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 460 ; CHECK-LABEL: test_x86_sse2_paddus_b: 461 ; CHECK: # BB#0: 462 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 463 ; CHECK-NEXT: retl 464 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 465 ret <16 x i8> %res 466 } 467 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 468 469 470 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 471 ; CHECK-LABEL: test_x86_sse2_paddus_w: 472 ; CHECK: # BB#0: 473 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 474 ; CHECK-NEXT: retl 475 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 476 ret <8 x i16> %res 477 } 478 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 479 480 481 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 482 ; CHECK-LABEL: test_x86_sse2_pavg_b: 483 ; CHECK: # BB#0: 484 ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 485 ; CHECK-NEXT: retl 486 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 487 ret <16 x i8> %res 488 } 489 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 490 491 492 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 493 ; CHECK-LABEL: test_x86_sse2_pavg_w: 494 ; CHECK: # BB#0: 495 ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 496 ; CHECK-NEXT: retl 497 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 498 ret <8 x i16> %res 499 } 500 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 501 502 503 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 504 ; CHECK-LABEL: test_x86_sse2_pmadd_wd: 505 ; CHECK: # BB#0: 506 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 507 ; CHECK-NEXT: retl 508 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 509 ret <4 x i32> %res 510 } 511 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 512 513 514 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 515 ; CHECK-LABEL: test_x86_sse2_pmaxs_w: 516 ; CHECK: # BB#0: 517 ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 518 ; CHECK-NEXT: retl 519 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 520 ret <8 x i16> %res 521 } 522 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 523 524 525 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 526 ; CHECK-LABEL: test_x86_sse2_pmaxu_b: 527 ; CHECK: # BB#0: 528 ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 529 ; CHECK-NEXT: retl 530 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 531 ret <16 x i8> %res 532 } 533 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 534 535 536 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 537 ; CHECK-LABEL: test_x86_sse2_pmins_w: 538 ; CHECK: # BB#0: 539 ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 540 ; CHECK-NEXT: retl 541 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 542 ret <8 x i16> %res 543 } 544 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 545 546 547 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 548 ; CHECK-LABEL: test_x86_sse2_pminu_b: 549 ; CHECK: # BB#0: 550 ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 551 ; CHECK-NEXT: retl 552 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 553 ret <16 x i8> %res 554 } 555 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 556 557 558 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 559 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128: 560 ; CHECK: # BB#0: 561 ; CHECK-NEXT: vpmovmskb %xmm0, %eax 562 ; CHECK-NEXT: retl 563 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 564 ret i32 %res 565 } 566 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 567 568 569 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 570 ; CHECK-LABEL: test_x86_sse2_pmulh_w: 571 ; CHECK: # BB#0: 572 ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 573 ; CHECK-NEXT: retl 574 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 575 ret <8 x i16> %res 576 } 577 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 578 579 580 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 581 ; CHECK-LABEL: test_x86_sse2_pmulhu_w: 582 ; CHECK: # BB#0: 583 ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 584 ; CHECK-NEXT: retl 585 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 586 ret <8 x i16> %res 587 } 588 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 589 590 591 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 592 ; CHECK-LABEL: test_x86_sse2_pmulu_dq: 593 ; CHECK: # BB#0: 594 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 595 ; CHECK-NEXT: retl 596 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 597 ret <2 x i64> %res 598 } 599 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 600 601 602 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 603 ; CHECK-LABEL: test_x86_sse2_psad_bw: 604 ; CHECK: # BB#0: 605 ; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 606 ; CHECK-NEXT: retl 607 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 608 ret <2 x i64> %res 609 } 610 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 611 612 613 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 614 ; CHECK-LABEL: test_x86_sse2_psll_d: 615 ; CHECK: # BB#0: 616 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 617 ; CHECK-NEXT: retl 618 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 619 ret <4 x i32> %res 620 } 621 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 622 623 624 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 625 ; CHECK-LABEL: test_x86_sse2_psll_q: 626 ; CHECK: # BB#0: 627 ; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0 628 ; CHECK-NEXT: retl 629 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 630 ret <2 x i64> %res 631 } 632 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 633 634 635 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 636 ; CHECK-LABEL: test_x86_sse2_psll_w: 637 ; CHECK: # BB#0: 638 ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 639 ; CHECK-NEXT: retl 640 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 641 ret <8 x i16> %res 642 } 643 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 644 645 646 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 647 ; CHECK-LABEL: test_x86_sse2_pslli_d: 648 ; CHECK: # BB#0: 649 ; CHECK-NEXT: vpslld $7, %xmm0, %xmm0 650 ; CHECK-NEXT: retl 651 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 652 ret <4 x i32> %res 653 } 654 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 655 656 657 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 658 ; CHECK-LABEL: test_x86_sse2_pslli_q: 659 ; CHECK: # BB#0: 660 ; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0 661 ; CHECK-NEXT: retl 662 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 663 ret <2 x i64> %res 664 } 665 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 666 667 668 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 669 ; CHECK-LABEL: test_x86_sse2_pslli_w: 670 ; CHECK: # BB#0: 671 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 672 ; CHECK-NEXT: retl 673 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 674 ret <8 x i16> %res 675 } 676 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 677 678 679 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 680 ; CHECK-LABEL: test_x86_sse2_psra_d: 681 ; CHECK: # BB#0: 682 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 683 ; CHECK-NEXT: retl 684 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 685 ret <4 x i32> %res 686 } 687 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 688 689 690 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 691 ; CHECK-LABEL: test_x86_sse2_psra_w: 692 ; CHECK: # BB#0: 693 ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 694 ; CHECK-NEXT: retl 695 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 696 ret <8 x i16> %res 697 } 698 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 699 700 701 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 702 ; CHECK-LABEL: test_x86_sse2_psrai_d: 703 ; CHECK: # BB#0: 704 ; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0 705 ; CHECK-NEXT: retl 706 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 707 ret <4 x i32> %res 708 } 709 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 710 711 712 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 713 ; CHECK-LABEL: test_x86_sse2_psrai_w: 714 ; CHECK: # BB#0: 715 ; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0 716 ; CHECK-NEXT: retl 717 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 718 ret <8 x i16> %res 719 } 720 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 721 722 723 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 724 ; CHECK-LABEL: test_x86_sse2_psrl_d: 725 ; CHECK: # BB#0: 726 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 727 ; CHECK-NEXT: retl 728 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 729 ret <4 x i32> %res 730 } 731 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 732 733 734 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 735 ; CHECK-LABEL: test_x86_sse2_psrl_q: 736 ; CHECK: # BB#0: 737 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 738 ; CHECK-NEXT: retl 739 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 740 ret <2 x i64> %res 741 } 742 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 743 744 745 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 746 ; CHECK-LABEL: test_x86_sse2_psrl_w: 747 ; CHECK: # BB#0: 748 ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 749 ; CHECK-NEXT: retl 750 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 751 ret <8 x i16> %res 752 } 753 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 754 755 756 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 757 ; CHECK-LABEL: test_x86_sse2_psrli_d: 758 ; CHECK: # BB#0: 759 ; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0 760 ; CHECK-NEXT: retl 761 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 762 ret <4 x i32> %res 763 } 764 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 765 766 767 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 768 ; CHECK-LABEL: test_x86_sse2_psrli_q: 769 ; CHECK: # BB#0: 770 ; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0 771 ; CHECK-NEXT: retl 772 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 773 ret <2 x i64> %res 774 } 775 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 776 777 778 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 779 ; CHECK-LABEL: test_x86_sse2_psrli_w: 780 ; CHECK: # BB#0: 781 ; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0 782 ; CHECK-NEXT: retl 783 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 784 ret <8 x i16> %res 785 } 786 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 787 788 789 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 790 ; CHECK-LABEL: test_x86_sse2_psubs_b: 791 ; CHECK: # BB#0: 792 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 793 ; CHECK-NEXT: retl 794 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 795 ret <16 x i8> %res 796 } 797 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 798 799 800 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 801 ; CHECK-LABEL: test_x86_sse2_psubs_w: 802 ; CHECK: # BB#0: 803 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 804 ; CHECK-NEXT: retl 805 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 806 ret <8 x i16> %res 807 } 808 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 809 810 811 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 812 ; CHECK-LABEL: test_x86_sse2_psubus_b: 813 ; CHECK: # BB#0: 814 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 815 ; CHECK-NEXT: retl 816 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 817 ret <16 x i8> %res 818 } 819 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 820 821 822 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 823 ; CHECK-LABEL: test_x86_sse2_psubus_w: 824 ; CHECK: # BB#0: 825 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 826 ; CHECK-NEXT: retl 827 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 828 ret <8 x i16> %res 829 } 830 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 831 832 833 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 834 ; CHECK-LABEL: test_x86_sse2_sqrt_pd: 835 ; CHECK: # BB#0: 836 ; CHECK-NEXT: vsqrtpd %xmm0, %xmm0 837 ; CHECK-NEXT: retl 838 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 839 ret <2 x double> %res 840 } 841 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 842 843 844 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 845 ; CHECK-LABEL: test_x86_sse2_sqrt_sd: 846 ; CHECK: # BB#0: 847 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 848 ; CHECK-NEXT: retl 849 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 850 ret <2 x double> %res 851 } 852 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 853 854 855 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 856 ; CHECK-LABEL: test_x86_sse2_storel_dq: 857 ; CHECK: # BB#0: 858 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 859 ; CHECK-NEXT: vmovq %xmm0, (%eax) 860 ; CHECK-NEXT: retl 861 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 862 ret void 863 } 864 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 865 866 867 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 868 ; add operation forces the execution domain. 869 ; CHECK-LABEL: test_x86_sse2_storeu_dq: 870 ; CHECK: # BB#0: 871 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 872 ; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 873 ; CHECK-NEXT: vmovdqu %xmm0, (%eax) 874 ; CHECK-NEXT: retl 875 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 876 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 877 ret void 878 } 879 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 880 881 882 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 883 ; fadd operation forces the execution domain. 884 ; CHECK-LABEL: test_x86_sse2_storeu_pd: 885 ; CHECK: # BB#0: 886 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 887 ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 888 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 889 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 890 ; CHECK-NEXT: vmovupd %xmm0, (%eax) 891 ; CHECK-NEXT: retl 892 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 893 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 894 ret void 895 } 896 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 897 898 899 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 900 ; CHECK-LABEL: test_x86_sse2_sub_sd: 901 ; CHECK: # BB#0: 902 ; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 903 ; CHECK-NEXT: retl 904 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 905 ret <2 x double> %res 906 } 907 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 908 909 910 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 911 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd: 912 ; CHECK: # BB#0: 913 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 914 ; CHECK-NEXT: sete %al 915 ; CHECK-NEXT: movzbl %al, %eax 916 ; CHECK-NEXT: retl 917 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 918 ret i32 %res 919 } 920 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 921 922 923 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 924 ; CHECK-LABEL: test_x86_sse2_ucomige_sd: 925 ; CHECK: # BB#0: 926 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 927 ; CHECK-NEXT: setae %al 928 ; CHECK-NEXT: movzbl %al, %eax 929 ; CHECK-NEXT: retl 930 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 931 ret i32 %res 932 } 933 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 934 935 936 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 937 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd: 938 ; CHECK: # BB#0: 939 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 940 ; CHECK-NEXT: seta %al 941 ; CHECK-NEXT: movzbl %al, %eax 942 ; CHECK-NEXT: retl 943 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 944 ret i32 %res 945 } 946 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 947 948 949 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 950 ; CHECK-LABEL: test_x86_sse2_ucomile_sd: 951 ; CHECK: # BB#0: 952 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 953 ; CHECK-NEXT: setbe %al 954 ; CHECK-NEXT: movzbl %al, %eax 955 ; CHECK-NEXT: retl 956 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 957 ret i32 %res 958 } 959 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 960 961 962 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 963 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd: 964 ; CHECK: # BB#0: 965 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 966 ; CHECK-NEXT: sbbl %eax, %eax 967 ; CHECK-NEXT: andl $1, %eax 968 ; CHECK-NEXT: retl 969 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 970 ret i32 %res 971 } 972 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 973 974 975 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 976 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd: 977 ; CHECK: # BB#0: 978 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 979 ; CHECK-NEXT: setne %al 980 ; CHECK-NEXT: movzbl %al, %eax 981 ; CHECK-NEXT: retl 982 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 983 ret i32 %res 984 } 985 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 986 987 988 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 989 ; CHECK-LABEL: test_x86_sse3_addsub_pd: 990 ; CHECK: # BB#0: 991 ; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 992 ; CHECK-NEXT: retl 993 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 994 ret <2 x double> %res 995 } 996 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 997 998 999 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 1000 ; CHECK-LABEL: test_x86_sse3_addsub_ps: 1001 ; CHECK: # BB#0: 1002 ; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 1003 ; CHECK-NEXT: retl 1004 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1005 ret <4 x float> %res 1006 } 1007 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 1008 1009 1010 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 1011 ; CHECK-LABEL: test_x86_sse3_hadd_pd: 1012 ; CHECK: # BB#0: 1013 ; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 1014 ; CHECK-NEXT: retl 1015 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1016 ret <2 x double> %res 1017 } 1018 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 1019 1020 1021 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 1022 ; CHECK-LABEL: test_x86_sse3_hadd_ps: 1023 ; CHECK: # BB#0: 1024 ; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0 1025 ; CHECK-NEXT: retl 1026 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1027 ret <4 x float> %res 1028 } 1029 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 1030 1031 1032 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 1033 ; CHECK-LABEL: test_x86_sse3_hsub_pd: 1034 ; CHECK: # BB#0: 1035 ; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 1036 ; CHECK-NEXT: retl 1037 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1038 ret <2 x double> %res 1039 } 1040 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 1041 1042 1043 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 1044 ; CHECK-LABEL: test_x86_sse3_hsub_ps: 1045 ; CHECK: # BB#0: 1046 ; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0 1047 ; CHECK-NEXT: retl 1048 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1049 ret <4 x float> %res 1050 } 1051 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 1052 1053 1054 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 1055 ; CHECK-LABEL: test_x86_sse3_ldu_dq: 1056 ; CHECK: # BB#0: 1057 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1058 ; CHECK-NEXT: vlddqu (%eax), %xmm0 1059 ; CHECK-NEXT: retl 1060 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 1061 ret <16 x i8> %res 1062 } 1063 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 1064 1065 1066 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 1067 ; CHECK-LABEL: test_x86_sse41_blendvpd: 1068 ; CHECK: # BB#0: 1069 ; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1070 ; CHECK-NEXT: retl 1071 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 1072 ret <2 x double> %res 1073 } 1074 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 1075 1076 1077 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 1078 ; CHECK-LABEL: test_x86_sse41_blendvps: 1079 ; CHECK: # BB#0: 1080 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1081 ; CHECK-NEXT: retl 1082 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 1083 ret <4 x float> %res 1084 } 1085 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 1086 1087 1088 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 1089 ; CHECK-LABEL: test_x86_sse41_dppd: 1090 ; CHECK: # BB#0: 1091 ; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 1092 ; CHECK-NEXT: retl 1093 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 1094 ret <2 x double> %res 1095 } 1096 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 1097 1098 1099 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 1100 ; CHECK-LABEL: test_x86_sse41_dpps: 1101 ; CHECK: # BB#0: 1102 ; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 1103 ; CHECK-NEXT: retl 1104 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1105 ret <4 x float> %res 1106 } 1107 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 1108 1109 1110 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 1111 ; CHECK-LABEL: test_x86_sse41_insertps: 1112 ; CHECK: # BB#0: 1113 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3] 1114 ; CHECK-NEXT: retl 1115 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1116 ret <4 x float> %res 1117 } 1118 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 1119 1120 1121 1122 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 1123 ; CHECK-LABEL: test_x86_sse41_mpsadbw: 1124 ; CHECK: # BB#0: 1125 ; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 1126 ; CHECK-NEXT: retl 1127 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 1128 ret <8 x i16> %res 1129 } 1130 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 1131 1132 1133 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 1134 ; CHECK-LABEL: test_x86_sse41_packusdw: 1135 ; CHECK: # BB#0: 1136 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1137 ; CHECK-NEXT: retl 1138 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 1139 ret <8 x i16> %res 1140 } 1141 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 1142 1143 1144 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 1145 ; CHECK-LABEL: test_x86_sse41_pblendvb: 1146 ; CHECK: # BB#0: 1147 ; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1148 ; CHECK-NEXT: retl 1149 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 1150 ret <16 x i8> %res 1151 } 1152 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1153 1154 1155 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 1156 ; CHECK-LABEL: test_x86_sse41_phminposuw: 1157 ; CHECK: # BB#0: 1158 ; CHECK-NEXT: vphminposuw %xmm0, %xmm0 1159 ; CHECK-NEXT: retl 1160 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1161 ret <8 x i16> %res 1162 } 1163 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 1164 1165 1166 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 1167 ; CHECK-LABEL: test_x86_sse41_pmaxsb: 1168 ; CHECK: # BB#0: 1169 ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1170 ; CHECK-NEXT: retl 1171 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1172 ret <16 x i8> %res 1173 } 1174 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 1175 1176 1177 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 1178 ; CHECK-LABEL: test_x86_sse41_pmaxsd: 1179 ; CHECK: # BB#0: 1180 ; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1181 ; CHECK-NEXT: retl 1182 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1183 ret <4 x i32> %res 1184 } 1185 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 1186 1187 1188 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 1189 ; CHECK-LABEL: test_x86_sse41_pmaxud: 1190 ; CHECK: # BB#0: 1191 ; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1192 ; CHECK-NEXT: retl 1193 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1194 ret <4 x i32> %res 1195 } 1196 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 1197 1198 1199 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 1200 ; CHECK-LABEL: test_x86_sse41_pmaxuw: 1201 ; CHECK: # BB#0: 1202 ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1203 ; CHECK-NEXT: retl 1204 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1205 ret <8 x i16> %res 1206 } 1207 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 1208 1209 1210 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 1211 ; CHECK-LABEL: test_x86_sse41_pminsb: 1212 ; CHECK: # BB#0: 1213 ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1214 ; CHECK-NEXT: retl 1215 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1216 ret <16 x i8> %res 1217 } 1218 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 1219 1220 1221 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 1222 ; CHECK-LABEL: test_x86_sse41_pminsd: 1223 ; CHECK: # BB#0: 1224 ; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1225 ; CHECK-NEXT: retl 1226 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1227 ret <4 x i32> %res 1228 } 1229 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 1230 1231 1232 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 1233 ; CHECK-LABEL: test_x86_sse41_pminud: 1234 ; CHECK: # BB#0: 1235 ; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0 1236 ; CHECK-NEXT: retl 1237 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1238 ret <4 x i32> %res 1239 } 1240 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 1241 1242 1243 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 1244 ; CHECK-LABEL: test_x86_sse41_pminuw: 1245 ; CHECK: # BB#0: 1246 ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1247 ; CHECK-NEXT: retl 1248 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1249 ret <8 x i16> %res 1250 } 1251 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 1252 1253 1254 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 1255 ; CHECK-LABEL: test_x86_sse41_pmovsxbd: 1256 ; CHECK: # BB#0: 1257 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 1258 ; CHECK-NEXT: retl 1259 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1260 ret <4 x i32> %res 1261 } 1262 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 1263 1264 1265 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 1266 ; CHECK-LABEL: test_x86_sse41_pmovsxbq: 1267 ; CHECK: # BB#0: 1268 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 1269 ; CHECK-NEXT: retl 1270 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1271 ret <2 x i64> %res 1272 } 1273 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 1274 1275 1276 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 1277 ; CHECK-LABEL: test_x86_sse41_pmovsxbw: 1278 ; CHECK: # BB#0: 1279 ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 1280 ; CHECK-NEXT: retl 1281 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1282 ret <8 x i16> %res 1283 } 1284 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 1285 1286 1287 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 1288 ; CHECK-LABEL: test_x86_sse41_pmovsxdq: 1289 ; CHECK: # BB#0: 1290 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 1291 ; CHECK-NEXT: retl 1292 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1293 ret <2 x i64> %res 1294 } 1295 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 1296 1297 1298 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 1299 ; CHECK-LABEL: test_x86_sse41_pmovsxwd: 1300 ; CHECK: # BB#0: 1301 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 1302 ; CHECK-NEXT: retl 1303 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1304 ret <4 x i32> %res 1305 } 1306 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 1307 1308 1309 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 1310 ; CHECK-LABEL: test_x86_sse41_pmovsxwq: 1311 ; CHECK: # BB#0: 1312 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 1313 ; CHECK-NEXT: retl 1314 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1315 ret <2 x i64> %res 1316 } 1317 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 1318 1319 1320 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1321 ; CHECK-LABEL: test_x86_sse41_pmovzxbd: 1322 ; CHECK: # BB#0: 1323 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1324 ; CHECK-NEXT: retl 1325 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1326 ret <4 x i32> %res 1327 } 1328 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1329 1330 1331 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1332 ; CHECK-LABEL: test_x86_sse41_pmovzxbq: 1333 ; CHECK: # BB#0: 1334 ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1335 ; CHECK-NEXT: retl 1336 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1337 ret <2 x i64> %res 1338 } 1339 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1340 1341 1342 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1343 ; CHECK-LABEL: test_x86_sse41_pmovzxbw: 1344 ; CHECK: # BB#0: 1345 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1346 ; CHECK-NEXT: retl 1347 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1348 ret <8 x i16> %res 1349 } 1350 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1351 1352 1353 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1354 ; CHECK-LABEL: test_x86_sse41_pmovzxdq: 1355 ; CHECK: # BB#0: 1356 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1357 ; CHECK-NEXT: retl 1358 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1359 ret <2 x i64> %res 1360 } 1361 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1362 1363 1364 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1365 ; CHECK-LABEL: test_x86_sse41_pmovzxwd: 1366 ; CHECK: # BB#0: 1367 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1368 ; CHECK-NEXT: retl 1369 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1370 ret <4 x i32> %res 1371 } 1372 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1373 1374 1375 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1376 ; CHECK-LABEL: test_x86_sse41_pmovzxwq: 1377 ; CHECK: # BB#0: 1378 ; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1379 ; CHECK-NEXT: retl 1380 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1381 ret <2 x i64> %res 1382 } 1383 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1384 1385 1386 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1387 ; CHECK-LABEL: test_x86_sse41_pmuldq: 1388 ; CHECK: # BB#0: 1389 ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 1390 ; CHECK-NEXT: retl 1391 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1392 ret <2 x i64> %res 1393 } 1394 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1395 1396 1397 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 1398 ; CHECK-LABEL: test_x86_sse41_ptestc: 1399 ; CHECK: # BB#0: 1400 ; CHECK-NEXT: vptest %xmm1, %xmm0 1401 ; CHECK-NEXT: sbbl %eax, %eax 1402 ; CHECK-NEXT: andl $1, %eax 1403 ; CHECK-NEXT: retl 1404 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1405 ret i32 %res 1406 } 1407 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1408 1409 1410 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 1411 ; CHECK-LABEL: test_x86_sse41_ptestnzc: 1412 ; CHECK: # BB#0: 1413 ; CHECK-NEXT: vptest %xmm1, %xmm0 1414 ; CHECK-NEXT: seta %al 1415 ; CHECK-NEXT: movzbl %al, %eax 1416 ; CHECK-NEXT: retl 1417 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1418 ret i32 %res 1419 } 1420 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1421 1422 1423 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 1424 ; CHECK-LABEL: test_x86_sse41_ptestz: 1425 ; CHECK: # BB#0: 1426 ; CHECK-NEXT: vptest %xmm1, %xmm0 1427 ; CHECK-NEXT: sete %al 1428 ; CHECK-NEXT: movzbl %al, %eax 1429 ; CHECK-NEXT: retl 1430 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1431 ret i32 %res 1432 } 1433 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1434 1435 1436 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1437 ; CHECK-LABEL: test_x86_sse41_round_pd: 1438 ; CHECK: # BB#0: 1439 ; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0 1440 ; CHECK-NEXT: retl 1441 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1442 ret <2 x double> %res 1443 } 1444 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1445 1446 1447 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1448 ; CHECK-LABEL: test_x86_sse41_round_ps: 1449 ; CHECK: # BB#0: 1450 ; CHECK-NEXT: vroundps $7, %xmm0, %xmm0 1451 ; CHECK-NEXT: retl 1452 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1453 ret <4 x float> %res 1454 } 1455 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1456 1457 1458 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1459 ; CHECK-LABEL: test_x86_sse41_round_sd: 1460 ; CHECK: # BB#0: 1461 ; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 1462 ; CHECK-NEXT: retl 1463 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1464 ret <2 x double> %res 1465 } 1466 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1467 1468 1469 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1470 ; CHECK-LABEL: test_x86_sse41_round_ss: 1471 ; CHECK: # BB#0: 1472 ; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 1473 ; CHECK-NEXT: retl 1474 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1475 ret <4 x float> %res 1476 } 1477 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1478 1479 1480 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1481 ; CHECK-LABEL: test_x86_sse42_pcmpestri128: 1482 ; CHECK: # BB#0: 1483 ; CHECK-NEXT: movl $7, %eax 1484 ; CHECK-NEXT: movl $7, %edx 1485 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1486 ; CHECK-NEXT: movl %ecx, %eax 1487 ; CHECK-NEXT: retl 1488 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1489 ret i32 %res 1490 } 1491 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1492 1493 1494 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { 1495 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load: 1496 ; CHECK: # BB#0: 1497 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1498 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1499 ; CHECK-NEXT: vmovdqa (%eax), %xmm0 1500 ; CHECK-NEXT: movl $7, %eax 1501 ; CHECK-NEXT: movl $7, %edx 1502 ; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0 1503 ; CHECK-NEXT: movl %ecx, %eax 1504 ; CHECK-NEXT: retl 1505 %1 = load <16 x i8>, <16 x i8>* %a0 1506 %2 = load <16 x i8>, <16 x i8>* %a2 1507 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] 1508 ret i32 %res 1509 } 1510 1511 1512 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1513 ; CHECK-LABEL: test_x86_sse42_pcmpestria128: 1514 ; CHECK: # BB#0: 1515 ; CHECK-NEXT: movl $7, %eax 1516 ; CHECK-NEXT: movl $7, %edx 1517 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1518 ; CHECK-NEXT: seta %al 1519 ; CHECK-NEXT: movzbl %al, %eax 1520 ; CHECK-NEXT: retl 1521 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1522 ret i32 %res 1523 } 1524 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1525 1526 1527 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1528 ; CHECK-LABEL: test_x86_sse42_pcmpestric128: 1529 ; CHECK: # BB#0: 1530 ; CHECK-NEXT: movl $7, %eax 1531 ; CHECK-NEXT: movl $7, %edx 1532 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1533 ; CHECK-NEXT: sbbl %eax, %eax 1534 ; CHECK-NEXT: andl $1, %eax 1535 ; CHECK-NEXT: retl 1536 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1537 ret i32 %res 1538 } 1539 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1540 1541 1542 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1543 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128: 1544 ; CHECK: # BB#0: 1545 ; CHECK-NEXT: movl $7, %eax 1546 ; CHECK-NEXT: movl $7, %edx 1547 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1548 ; CHECK-NEXT: seto %al 1549 ; CHECK-NEXT: movzbl %al, %eax 1550 ; CHECK-NEXT: retl 1551 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1552 ret i32 %res 1553 } 1554 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1555 1556 1557 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1558 ; CHECK-LABEL: test_x86_sse42_pcmpestris128: 1559 ; CHECK: # BB#0: 1560 ; CHECK-NEXT: movl $7, %eax 1561 ; CHECK-NEXT: movl $7, %edx 1562 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1563 ; CHECK-NEXT: sets %al 1564 ; CHECK-NEXT: movzbl %al, %eax 1565 ; CHECK-NEXT: retl 1566 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1567 ret i32 %res 1568 } 1569 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1570 1571 1572 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1573 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128: 1574 ; CHECK: # BB#0: 1575 ; CHECK-NEXT: movl $7, %eax 1576 ; CHECK-NEXT: movl $7, %edx 1577 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1578 ; CHECK-NEXT: sete %al 1579 ; CHECK-NEXT: movzbl %al, %eax 1580 ; CHECK-NEXT: retl 1581 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1582 ret i32 %res 1583 } 1584 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1585 1586 1587 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1588 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128: 1589 ; CHECK: # BB#0: 1590 ; CHECK-NEXT: movl $7, %eax 1591 ; CHECK-NEXT: movl $7, %edx 1592 ; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0 1593 ; CHECK-NEXT: retl 1594 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1595 ret <16 x i8> %res 1596 } 1597 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1598 1599 1600 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { 1601 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load: 1602 ; CHECK: # BB#0: 1603 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1604 ; CHECK-NEXT: movl $7, %eax 1605 ; CHECK-NEXT: movl $7, %edx 1606 ; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0 1607 ; CHECK-NEXT: retl 1608 %1 = load <16 x i8>, <16 x i8>* %a2 1609 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1610 ret <16 x i8> %res 1611 } 1612 1613 1614 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1615 ; CHECK-LABEL: test_x86_sse42_pcmpistri128: 1616 ; CHECK: # BB#0: 1617 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1618 ; CHECK-NEXT: movl %ecx, %eax 1619 ; CHECK-NEXT: retl 1620 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1621 ret i32 %res 1622 } 1623 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1624 1625 1626 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { 1627 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load: 1628 ; CHECK: # BB#0: 1629 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1630 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1631 ; CHECK-NEXT: vmovdqa (%ecx), %xmm0 1632 ; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0 1633 ; CHECK-NEXT: movl %ecx, %eax 1634 ; CHECK-NEXT: retl 1635 %1 = load <16 x i8>, <16 x i8>* %a0 1636 %2 = load <16 x i8>, <16 x i8>* %a1 1637 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] 1638 ret i32 %res 1639 } 1640 1641 1642 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1643 ; CHECK-LABEL: test_x86_sse42_pcmpistria128: 1644 ; CHECK: # BB#0: 1645 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1646 ; CHECK-NEXT: seta %al 1647 ; CHECK-NEXT: movzbl %al, %eax 1648 ; CHECK-NEXT: retl 1649 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1650 ret i32 %res 1651 } 1652 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1653 1654 1655 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1656 ; CHECK-LABEL: test_x86_sse42_pcmpistric128: 1657 ; CHECK: # BB#0: 1658 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1659 ; CHECK-NEXT: sbbl %eax, %eax 1660 ; CHECK-NEXT: andl $1, %eax 1661 ; CHECK-NEXT: retl 1662 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1663 ret i32 %res 1664 } 1665 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1666 1667 1668 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1669 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128: 1670 ; CHECK: # BB#0: 1671 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1672 ; CHECK-NEXT: seto %al 1673 ; CHECK-NEXT: movzbl %al, %eax 1674 ; CHECK-NEXT: retl 1675 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1676 ret i32 %res 1677 } 1678 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1679 1680 1681 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1682 ; CHECK-LABEL: test_x86_sse42_pcmpistris128: 1683 ; CHECK: # BB#0: 1684 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1685 ; CHECK-NEXT: sets %al 1686 ; CHECK-NEXT: movzbl %al, %eax 1687 ; CHECK-NEXT: retl 1688 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1689 ret i32 %res 1690 } 1691 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1692 1693 1694 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1695 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128: 1696 ; CHECK: # BB#0: 1697 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1698 ; CHECK-NEXT: sete %al 1699 ; CHECK-NEXT: movzbl %al, %eax 1700 ; CHECK-NEXT: retl 1701 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1702 ret i32 %res 1703 } 1704 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1705 1706 1707 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1708 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128: 1709 ; CHECK: # BB#0: 1710 ; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0 1711 ; CHECK-NEXT: retl 1712 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1713 ret <16 x i8> %res 1714 } 1715 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1716 1717 1718 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { 1719 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load: 1720 ; CHECK: # BB#0: 1721 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1722 ; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 1723 ; CHECK-NEXT: retl 1724 %1 = load <16 x i8>, <16 x i8>* %a1 1725 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] 1726 ret <16 x i8> %res 1727 } 1728 1729 1730 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1731 ; CHECK-LABEL: test_x86_sse_add_ss: 1732 ; CHECK: # BB#0: 1733 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 1734 ; CHECK-NEXT: retl 1735 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1736 ret <4 x float> %res 1737 } 1738 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1739 1740 1741 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1742 ; CHECK-LABEL: test_x86_sse_cmp_ps: 1743 ; CHECK: # BB#0: 1744 ; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 1745 ; CHECK-NEXT: retl 1746 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1747 ret <4 x float> %res 1748 } 1749 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1750 1751 1752 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1753 ; CHECK-LABEL: test_x86_sse_cmp_ss: 1754 ; CHECK: # BB#0: 1755 ; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 1756 ; CHECK-NEXT: retl 1757 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1758 ret <4 x float> %res 1759 } 1760 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1761 1762 1763 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1764 ; CHECK-LABEL: test_x86_sse_comieq_ss: 1765 ; CHECK: # BB#0: 1766 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1767 ; CHECK-NEXT: sete %al 1768 ; CHECK-NEXT: movzbl %al, %eax 1769 ; CHECK-NEXT: retl 1770 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1771 ret i32 %res 1772 } 1773 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1774 1775 1776 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1777 ; CHECK-LABEL: test_x86_sse_comige_ss: 1778 ; CHECK: # BB#0: 1779 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1780 ; CHECK-NEXT: setae %al 1781 ; CHECK-NEXT: movzbl %al, %eax 1782 ; CHECK-NEXT: retl 1783 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1784 ret i32 %res 1785 } 1786 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1787 1788 1789 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1790 ; CHECK-LABEL: test_x86_sse_comigt_ss: 1791 ; CHECK: # BB#0: 1792 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1793 ; CHECK-NEXT: seta %al 1794 ; CHECK-NEXT: movzbl %al, %eax 1795 ; CHECK-NEXT: retl 1796 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1797 ret i32 %res 1798 } 1799 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1800 1801 1802 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1803 ; CHECK-LABEL: test_x86_sse_comile_ss: 1804 ; CHECK: # BB#0: 1805 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1806 ; CHECK-NEXT: setbe %al 1807 ; CHECK-NEXT: movzbl %al, %eax 1808 ; CHECK-NEXT: retl 1809 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1810 ret i32 %res 1811 } 1812 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1813 1814 1815 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1816 ; CHECK-LABEL: test_x86_sse_comilt_ss: 1817 ; CHECK: # BB#0: 1818 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1819 ; CHECK-NEXT: sbbl %eax, %eax 1820 ; CHECK-NEXT: andl $1, %eax 1821 ; CHECK-NEXT: retl 1822 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1823 ret i32 %res 1824 } 1825 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1826 1827 1828 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1829 ; CHECK-LABEL: test_x86_sse_comineq_ss: 1830 ; CHECK: # BB#0: 1831 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1832 ; CHECK-NEXT: setne %al 1833 ; CHECK-NEXT: movzbl %al, %eax 1834 ; CHECK-NEXT: retl 1835 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1836 ret i32 %res 1837 } 1838 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1839 1840 1841 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1842 ; CHECK-LABEL: test_x86_sse_cvtsi2ss: 1843 ; CHECK: # BB#0: 1844 ; CHECK-NEXT: movl $7, %eax 1845 ; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 1846 ; CHECK-NEXT: retl 1847 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1848 ret <4 x float> %res 1849 } 1850 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1851 1852 1853 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1854 ; CHECK-LABEL: test_x86_sse_cvtss2si: 1855 ; CHECK: # BB#0: 1856 ; CHECK-NEXT: vcvtss2si %xmm0, %eax 1857 ; CHECK-NEXT: retl 1858 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1859 ret i32 %res 1860 } 1861 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1862 1863 1864 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1865 ; CHECK-LABEL: test_x86_sse_cvttss2si: 1866 ; CHECK: # BB#0: 1867 ; CHECK-NEXT: vcvttss2si %xmm0, %eax 1868 ; CHECK-NEXT: retl 1869 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1870 ret i32 %res 1871 } 1872 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1873 1874 1875 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1876 ; CHECK-LABEL: test_x86_sse_div_ss: 1877 ; CHECK: # BB#0: 1878 ; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 1879 ; CHECK-NEXT: retl 1880 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1881 ret <4 x float> %res 1882 } 1883 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1884 1885 1886 define void @test_x86_sse_ldmxcsr(i8* %a0) { 1887 ; CHECK-LABEL: test_x86_sse_ldmxcsr: 1888 ; CHECK: # BB#0: 1889 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1890 ; CHECK-NEXT: vldmxcsr (%eax) 1891 ; CHECK-NEXT: retl 1892 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1893 ret void 1894 } 1895 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1896 1897 1898 1899 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1900 ; CHECK-LABEL: test_x86_sse_max_ps: 1901 ; CHECK: # BB#0: 1902 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 1903 ; CHECK-NEXT: retl 1904 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1905 ret <4 x float> %res 1906 } 1907 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1908 1909 1910 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1911 ; CHECK-LABEL: test_x86_sse_max_ss: 1912 ; CHECK: # BB#0: 1913 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 1914 ; CHECK-NEXT: retl 1915 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1916 ret <4 x float> %res 1917 } 1918 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1919 1920 1921 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1922 ; CHECK-LABEL: test_x86_sse_min_ps: 1923 ; CHECK: # BB#0: 1924 ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 1925 ; CHECK-NEXT: retl 1926 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1927 ret <4 x float> %res 1928 } 1929 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1930 1931 1932 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1933 ; CHECK-LABEL: test_x86_sse_min_ss: 1934 ; CHECK: # BB#0: 1935 ; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 1936 ; CHECK-NEXT: retl 1937 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1938 ret <4 x float> %res 1939 } 1940 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1941 1942 1943 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1944 ; CHECK-LABEL: test_x86_sse_movmsk_ps: 1945 ; CHECK: # BB#0: 1946 ; CHECK-NEXT: vmovmskps %xmm0, %eax 1947 ; CHECK-NEXT: retl 1948 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1949 ret i32 %res 1950 } 1951 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1952 1953 1954 1955 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1956 ; CHECK-LABEL: test_x86_sse_mul_ss: 1957 ; CHECK: # BB#0: 1958 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 1959 ; CHECK-NEXT: retl 1960 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1961 ret <4 x float> %res 1962 } 1963 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1964 1965 1966 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1967 ; CHECK-LABEL: test_x86_sse_rcp_ps: 1968 ; CHECK: # BB#0: 1969 ; CHECK-NEXT: vrcpps %xmm0, %xmm0 1970 ; CHECK-NEXT: retl 1971 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1972 ret <4 x float> %res 1973 } 1974 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1975 1976 1977 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1978 ; CHECK-LABEL: test_x86_sse_rcp_ss: 1979 ; CHECK: # BB#0: 1980 ; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0 1981 ; CHECK-NEXT: retl 1982 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1983 ret <4 x float> %res 1984 } 1985 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1986 1987 1988 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1989 ; CHECK-LABEL: test_x86_sse_rsqrt_ps: 1990 ; CHECK: # BB#0: 1991 ; CHECK-NEXT: vrsqrtps %xmm0, %xmm0 1992 ; CHECK-NEXT: retl 1993 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1994 ret <4 x float> %res 1995 } 1996 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1997 1998 1999 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 2000 ; CHECK-LABEL: test_x86_sse_rsqrt_ss: 2001 ; CHECK: # BB#0: 2002 ; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 2003 ; CHECK-NEXT: retl 2004 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 2005 ret <4 x float> %res 2006 } 2007 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 2008 2009 2010 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 2011 ; CHECK-LABEL: test_x86_sse_sqrt_ps: 2012 ; CHECK: # BB#0: 2013 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0 2014 ; CHECK-NEXT: retl 2015 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 2016 ret <4 x float> %res 2017 } 2018 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 2019 2020 2021 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 2022 ; CHECK-LABEL: test_x86_sse_sqrt_ss: 2023 ; CHECK: # BB#0: 2024 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 2025 ; CHECK-NEXT: retl 2026 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 2027 ret <4 x float> %res 2028 } 2029 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 2030 2031 2032 define void @test_x86_sse_stmxcsr(i8* %a0) { 2033 ; CHECK-LABEL: test_x86_sse_stmxcsr: 2034 ; CHECK: # BB#0: 2035 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2036 ; CHECK-NEXT: vstmxcsr (%eax) 2037 ; CHECK-NEXT: retl 2038 call void @llvm.x86.sse.stmxcsr(i8* %a0) 2039 ret void 2040 } 2041 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 2042 2043 2044 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 2045 ; CHECK-LABEL: test_x86_sse_storeu_ps: 2046 ; CHECK: # BB#0: 2047 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2048 ; CHECK-NEXT: vmovups %xmm0, (%eax) 2049 ; CHECK-NEXT: retl 2050 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 2051 ret void 2052 } 2053 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 2054 2055 2056 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 2057 ; CHECK-LABEL: test_x86_sse_sub_ss: 2058 ; CHECK: # BB#0: 2059 ; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 2060 ; CHECK-NEXT: retl 2061 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2062 ret <4 x float> %res 2063 } 2064 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 2065 2066 2067 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 2068 ; CHECK-LABEL: test_x86_sse_ucomieq_ss: 2069 ; CHECK: # BB#0: 2070 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2071 ; CHECK-NEXT: sete %al 2072 ; CHECK-NEXT: movzbl %al, %eax 2073 ; CHECK-NEXT: retl 2074 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2075 ret i32 %res 2076 } 2077 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 2078 2079 2080 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 2081 ; CHECK-LABEL: test_x86_sse_ucomige_ss: 2082 ; CHECK: # BB#0: 2083 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2084 ; CHECK-NEXT: setae %al 2085 ; CHECK-NEXT: movzbl %al, %eax 2086 ; CHECK-NEXT: retl 2087 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2088 ret i32 %res 2089 } 2090 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 2091 2092 2093 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 2094 ; CHECK-LABEL: test_x86_sse_ucomigt_ss: 2095 ; CHECK: # BB#0: 2096 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2097 ; CHECK-NEXT: seta %al 2098 ; CHECK-NEXT: movzbl %al, %eax 2099 ; CHECK-NEXT: retl 2100 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2101 ret i32 %res 2102 } 2103 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 2104 2105 2106 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 2107 ; CHECK-LABEL: test_x86_sse_ucomile_ss: 2108 ; CHECK: # BB#0: 2109 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2110 ; CHECK-NEXT: setbe %al 2111 ; CHECK-NEXT: movzbl %al, %eax 2112 ; CHECK-NEXT: retl 2113 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2114 ret i32 %res 2115 } 2116 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 2117 2118 2119 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 2120 ; CHECK-LABEL: test_x86_sse_ucomilt_ss: 2121 ; CHECK: # BB#0: 2122 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2123 ; CHECK-NEXT: sbbl %eax, %eax 2124 ; CHECK-NEXT: andl $1, %eax 2125 ; CHECK-NEXT: retl 2126 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2127 ret i32 %res 2128 } 2129 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 2130 2131 2132 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 2133 ; CHECK-LABEL: test_x86_sse_ucomineq_ss: 2134 ; CHECK: # BB#0: 2135 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2136 ; CHECK-NEXT: setne %al 2137 ; CHECK-NEXT: movzbl %al, %eax 2138 ; CHECK-NEXT: retl 2139 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2140 ret i32 %res 2141 } 2142 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 2143 2144 2145 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 2146 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128: 2147 ; CHECK: # BB#0: 2148 ; CHECK-NEXT: vpabsb %xmm0, %xmm0 2149 ; CHECK-NEXT: retl 2150 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 2151 ret <16 x i8> %res 2152 } 2153 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 2154 2155 2156 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 2157 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128: 2158 ; CHECK: # BB#0: 2159 ; CHECK-NEXT: vpabsd %xmm0, %xmm0 2160 ; CHECK-NEXT: retl 2161 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 2162 ret <4 x i32> %res 2163 } 2164 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 2165 2166 2167 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 2168 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128: 2169 ; CHECK: # BB#0: 2170 ; CHECK-NEXT: vpabsw %xmm0, %xmm0 2171 ; CHECK-NEXT: retl 2172 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 2173 ret <8 x i16> %res 2174 } 2175 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 2176 2177 2178 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2179 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128: 2180 ; CHECK: # BB#0: 2181 ; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0 2182 ; CHECK-NEXT: retl 2183 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2184 ret <4 x i32> %res 2185 } 2186 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2187 2188 2189 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2190 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128: 2191 ; CHECK: # BB#0: 2192 ; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 2193 ; CHECK-NEXT: retl 2194 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2195 ret <8 x i16> %res 2196 } 2197 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2198 2199 2200 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2201 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128: 2202 ; CHECK: # BB#0: 2203 ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0 2204 ; CHECK-NEXT: retl 2205 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2206 ret <8 x i16> %res 2207 } 2208 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2209 2210 2211 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2212 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128: 2213 ; CHECK: # BB#0: 2214 ; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0 2215 ; CHECK-NEXT: retl 2216 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2217 ret <4 x i32> %res 2218 } 2219 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2220 2221 2222 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2223 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128: 2224 ; CHECK: # BB#0: 2225 ; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 2226 ; CHECK-NEXT: retl 2227 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2228 ret <8 x i16> %res 2229 } 2230 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2231 2232 2233 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2234 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128: 2235 ; CHECK: # BB#0: 2236 ; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0 2237 ; CHECK-NEXT: retl 2238 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2239 ret <8 x i16> %res 2240 } 2241 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2242 2243 2244 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { 2245 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128: 2246 ; CHECK: # BB#0: 2247 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 2248 ; CHECK-NEXT: retl 2249 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] 2250 ret <8 x i16> %res 2251 } 2252 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 2253 2254 2255 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2256 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128: 2257 ; CHECK: # BB#0: 2258 ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 2259 ; CHECK-NEXT: retl 2260 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2261 ret <8 x i16> %res 2262 } 2263 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2264 2265 2266 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2267 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128: 2268 ; CHECK: # BB#0: 2269 ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2270 ; CHECK-NEXT: retl 2271 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2272 ret <16 x i8> %res 2273 } 2274 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2275 2276 2277 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2278 ; CHECK-LABEL: test_x86_ssse3_psign_b_128: 2279 ; CHECK: # BB#0: 2280 ; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0 2281 ; CHECK-NEXT: retl 2282 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2283 ret <16 x i8> %res 2284 } 2285 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2286 2287 2288 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2289 ; CHECK-LABEL: test_x86_ssse3_psign_d_128: 2290 ; CHECK: # BB#0: 2291 ; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0 2292 ; CHECK-NEXT: retl 2293 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2294 ret <4 x i32> %res 2295 } 2296 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2297 2298 2299 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2300 ; CHECK-LABEL: test_x86_ssse3_psign_w_128: 2301 ; CHECK: # BB#0: 2302 ; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0 2303 ; CHECK-NEXT: retl 2304 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2305 ret <8 x i16> %res 2306 } 2307 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2308 2309 2310 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2311 ; CHECK-LABEL: test_x86_avx_addsub_pd_256: 2312 ; CHECK: # BB#0: 2313 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 2314 ; CHECK-NEXT: retl 2315 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2316 ret <4 x double> %res 2317 } 2318 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2319 2320 2321 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2322 ; CHECK-LABEL: test_x86_avx_addsub_ps_256: 2323 ; CHECK: # BB#0: 2324 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 2325 ; CHECK-NEXT: retl 2326 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2327 ret <8 x float> %res 2328 } 2329 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2330 2331 2332 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 2333 ; CHECK-LABEL: test_x86_avx_blendv_pd_256: 2334 ; CHECK: # BB#0: 2335 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2336 ; CHECK-NEXT: retl 2337 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 2338 ret <4 x double> %res 2339 } 2340 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 2341 2342 2343 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 2344 ; CHECK-LABEL: test_x86_avx_blendv_ps_256: 2345 ; CHECK: # BB#0: 2346 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 2347 ; CHECK-NEXT: retl 2348 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 2349 ret <8 x float> %res 2350 } 2351 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 2352 2353 2354 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 2355 ; CHECK-LABEL: test_x86_avx_cmp_pd_256: 2356 ; CHECK: # BB#0: 2357 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 2358 ; CHECK-NEXT: retl 2359 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2360 ret <4 x double> %res 2361 } 2362 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2363 2364 2365 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2366 ; CHECK-LABEL: test_x86_avx_cmp_ps_256: 2367 ; CHECK: # BB#0: 2368 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 2369 ; CHECK-NEXT: retl 2370 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2371 ret <8 x float> %res 2372 } 2373 2374 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 2375 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op: 2376 ; CHECK: # BB#0: 2377 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 2378 ; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1 2379 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1 2380 ; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1 2381 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1 2382 ; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1 2383 ; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1 2384 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1 2385 ; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1 2386 ; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1 2387 ; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1 2388 ; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1 2389 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1 2390 ; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1 2391 ; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1 2392 ; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1 2393 ; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1 2394 ; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1 2395 ; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1 2396 ; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1 2397 ; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1 2398 ; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1 2399 ; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1 2400 ; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1 2401 ; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1 2402 ; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1 2403 ; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1 2404 ; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1 2405 ; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1 2406 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 2407 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 2408 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 2409 ; CHECK-NEXT: retl 2410 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 2411 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 2412 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 2413 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 2414 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 2415 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 2416 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 2417 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 2418 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 2419 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 2420 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 2421 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 2422 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 2423 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 2424 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 2425 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 2426 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 2427 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 2428 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 2429 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 2430 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 2431 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 2432 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 2433 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 2434 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 2435 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 2436 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 2437 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 2438 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 2439 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 2440 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 2441 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 2442 ret <8 x float> %res 2443 } 2444 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2445 2446 2447 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 2448 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256: 2449 ; CHECK: # BB#0: 2450 ; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0 2451 ; CHECK-NEXT: vzeroupper 2452 ; CHECK-NEXT: retl 2453 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 2454 ret <4 x float> %res 2455 } 2456 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 2457 2458 2459 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 2460 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256: 2461 ; CHECK: # BB#0: 2462 ; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0 2463 ; CHECK-NEXT: vzeroupper 2464 ; CHECK-NEXT: retl 2465 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2466 ret <4 x i32> %res 2467 } 2468 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 2469 2470 2471 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 2472 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256: 2473 ; CHECK: # BB#0: 2474 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 2475 ; CHECK-NEXT: retl 2476 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 2477 ret <4 x double> %res 2478 } 2479 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 2480 2481 2482 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 2483 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256: 2484 ; CHECK: # BB#0: 2485 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 2486 ; CHECK-NEXT: retl 2487 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2488 ret <8 x i32> %res 2489 } 2490 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 2491 2492 2493 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 2494 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256: 2495 ; CHECK: # BB#0: 2496 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 2497 ; CHECK-NEXT: retl 2498 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 2499 ret <4 x double> %res 2500 } 2501 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 2502 2503 2504 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 2505 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256: 2506 ; CHECK: # BB#0: 2507 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 2508 ; CHECK-NEXT: retl 2509 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 2510 ret <8 x float> %res 2511 } 2512 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 2513 2514 2515 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 2516 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256: 2517 ; CHECK: # BB#0: 2518 ; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0 2519 ; CHECK-NEXT: vzeroupper 2520 ; CHECK-NEXT: retl 2521 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2522 ret <4 x i32> %res 2523 } 2524 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 2525 2526 2527 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 2528 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256: 2529 ; CHECK: # BB#0: 2530 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 2531 ; CHECK-NEXT: retl 2532 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2533 ret <8 x i32> %res 2534 } 2535 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 2536 2537 2538 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2539 ; CHECK-LABEL: test_x86_avx_dp_ps_256: 2540 ; CHECK: # BB#0: 2541 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 2542 ; CHECK-NEXT: retl 2543 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2544 ret <8 x float> %res 2545 } 2546 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2547 2548 2549 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 2550 ; CHECK-LABEL: test_x86_avx_hadd_pd_256: 2551 ; CHECK: # BB#0: 2552 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2553 ; CHECK-NEXT: retl 2554 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2555 ret <4 x double> %res 2556 } 2557 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 2558 2559 2560 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 2561 ; CHECK-LABEL: test_x86_avx_hadd_ps_256: 2562 ; CHECK: # BB#0: 2563 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 2564 ; CHECK-NEXT: retl 2565 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2566 ret <8 x float> %res 2567 } 2568 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 2569 2570 2571 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2572 ; CHECK-LABEL: test_x86_avx_hsub_pd_256: 2573 ; CHECK: # BB#0: 2574 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 2575 ; CHECK-NEXT: retl 2576 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2577 ret <4 x double> %res 2578 } 2579 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2580 2581 2582 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2583 ; CHECK-LABEL: test_x86_avx_hsub_ps_256: 2584 ; CHECK: # BB#0: 2585 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 2586 ; CHECK-NEXT: retl 2587 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2588 ret <8 x float> %res 2589 } 2590 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2591 2592 2593 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 2594 ; CHECK-LABEL: test_x86_avx_ldu_dq_256: 2595 ; CHECK: # BB#0: 2596 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2597 ; CHECK-NEXT: vlddqu (%eax), %ymm0 2598 ; CHECK-NEXT: retl 2599 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2600 ret <32 x i8> %res 2601 } 2602 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2603 2604 2605 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { 2606 ; CHECK-LABEL: test_x86_avx_maskload_pd: 2607 ; CHECK: # BB#0: 2608 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2609 ; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 2610 ; CHECK-NEXT: retl 2611 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 2612 ret <2 x double> %res 2613 } 2614 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly 2615 2616 2617 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { 2618 ; CHECK-LABEL: test_x86_avx_maskload_pd_256: 2619 ; CHECK: # BB#0: 2620 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2621 ; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 2622 ; CHECK-NEXT: retl 2623 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2624 ret <4 x double> %res 2625 } 2626 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly 2627 2628 2629 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { 2630 ; CHECK-LABEL: test_x86_avx_maskload_ps: 2631 ; CHECK: # BB#0: 2632 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2633 ; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 2634 ; CHECK-NEXT: retl 2635 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2636 ret <4 x float> %res 2637 } 2638 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly 2639 2640 2641 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { 2642 ; CHECK-LABEL: test_x86_avx_maskload_ps_256: 2643 ; CHECK: # BB#0: 2644 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2645 ; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 2646 ; CHECK-NEXT: retl 2647 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2648 ret <8 x float> %res 2649 } 2650 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly 2651 2652 2653 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { 2654 ; CHECK-LABEL: test_x86_avx_maskstore_pd: 2655 ; CHECK: # BB#0: 2656 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2657 ; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) 2658 ; CHECK-NEXT: retl 2659 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) 2660 ret void 2661 } 2662 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind 2663 2664 2665 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { 2666 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256: 2667 ; CHECK: # BB#0: 2668 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2669 ; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) 2670 ; CHECK-NEXT: vzeroupper 2671 ; CHECK-NEXT: retl 2672 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) 2673 ret void 2674 } 2675 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind 2676 2677 2678 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { 2679 ; CHECK-LABEL: test_x86_avx_maskstore_ps: 2680 ; CHECK: # BB#0: 2681 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2682 ; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) 2683 ; CHECK-NEXT: retl 2684 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) 2685 ret void 2686 } 2687 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind 2688 2689 2690 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { 2691 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256: 2692 ; CHECK: # BB#0: 2693 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2694 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) 2695 ; CHECK-NEXT: vzeroupper 2696 ; CHECK-NEXT: retl 2697 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) 2698 ret void 2699 } 2700 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind 2701 2702 2703 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2704 ; CHECK-LABEL: test_x86_avx_max_pd_256: 2705 ; CHECK: # BB#0: 2706 ; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 2707 ; CHECK-NEXT: retl 2708 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2709 ret <4 x double> %res 2710 } 2711 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2712 2713 2714 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2715 ; CHECK-LABEL: test_x86_avx_max_ps_256: 2716 ; CHECK: # BB#0: 2717 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 2718 ; CHECK-NEXT: retl 2719 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2720 ret <8 x float> %res 2721 } 2722 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2723 2724 2725 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2726 ; CHECK-LABEL: test_x86_avx_min_pd_256: 2727 ; CHECK: # BB#0: 2728 ; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0 2729 ; CHECK-NEXT: retl 2730 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2731 ret <4 x double> %res 2732 } 2733 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2734 2735 2736 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2737 ; CHECK-LABEL: test_x86_avx_min_ps_256: 2738 ; CHECK: # BB#0: 2739 ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 2740 ; CHECK-NEXT: retl 2741 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2742 ret <8 x float> %res 2743 } 2744 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2745 2746 2747 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2748 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256: 2749 ; CHECK: # BB#0: 2750 ; CHECK-NEXT: vmovmskpd %ymm0, %eax 2751 ; CHECK-NEXT: vzeroupper 2752 ; CHECK-NEXT: retl 2753 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2754 ret i32 %res 2755 } 2756 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2757 2758 2759 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2760 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256: 2761 ; CHECK: # BB#0: 2762 ; CHECK-NEXT: vmovmskps %ymm0, %eax 2763 ; CHECK-NEXT: vzeroupper 2764 ; CHECK-NEXT: retl 2765 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2766 ret i32 %res 2767 } 2768 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2769 2770 2771 2772 2773 2774 2775 2776 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2777 ; CHECK-LABEL: test_x86_avx_ptestc_256: 2778 ; CHECK: # BB#0: 2779 ; CHECK-NEXT: vptest %ymm1, %ymm0 2780 ; CHECK-NEXT: sbbl %eax, %eax 2781 ; CHECK-NEXT: andl $1, %eax 2782 ; CHECK-NEXT: vzeroupper 2783 ; CHECK-NEXT: retl 2784 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2785 ret i32 %res 2786 } 2787 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2788 2789 2790 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2791 ; CHECK-LABEL: test_x86_avx_ptestnzc_256: 2792 ; CHECK: # BB#0: 2793 ; CHECK-NEXT: vptest %ymm1, %ymm0 2794 ; CHECK-NEXT: seta %al 2795 ; CHECK-NEXT: movzbl %al, %eax 2796 ; CHECK-NEXT: vzeroupper 2797 ; CHECK-NEXT: retl 2798 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2799 ret i32 %res 2800 } 2801 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2802 2803 2804 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2805 ; CHECK-LABEL: test_x86_avx_ptestz_256: 2806 ; CHECK: # BB#0: 2807 ; CHECK-NEXT: vptest %ymm1, %ymm0 2808 ; CHECK-NEXT: sete %al 2809 ; CHECK-NEXT: movzbl %al, %eax 2810 ; CHECK-NEXT: vzeroupper 2811 ; CHECK-NEXT: retl 2812 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2813 ret i32 %res 2814 } 2815 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2816 2817 2818 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2819 ; CHECK-LABEL: test_x86_avx_rcp_ps_256: 2820 ; CHECK: # BB#0: 2821 ; CHECK-NEXT: vrcpps %ymm0, %ymm0 2822 ; CHECK-NEXT: retl 2823 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2824 ret <8 x float> %res 2825 } 2826 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2827 2828 2829 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2830 ; CHECK-LABEL: test_x86_avx_round_pd_256: 2831 ; CHECK: # BB#0: 2832 ; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0 2833 ; CHECK-NEXT: retl 2834 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2835 ret <4 x double> %res 2836 } 2837 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2838 2839 2840 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2841 ; CHECK-LABEL: test_x86_avx_round_ps_256: 2842 ; CHECK: # BB#0: 2843 ; CHECK-NEXT: vroundps $7, %ymm0, %ymm0 2844 ; CHECK-NEXT: retl 2845 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2846 ret <8 x float> %res 2847 } 2848 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2849 2850 2851 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2852 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256: 2853 ; CHECK: # BB#0: 2854 ; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 2855 ; CHECK-NEXT: retl 2856 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2857 ret <8 x float> %res 2858 } 2859 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2860 2861 2862 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2863 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256: 2864 ; CHECK: # BB#0: 2865 ; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 2866 ; CHECK-NEXT: retl 2867 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2868 ret <4 x double> %res 2869 } 2870 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2871 2872 2873 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2874 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256: 2875 ; CHECK: # BB#0: 2876 ; CHECK-NEXT: vsqrtps %ymm0, %ymm0 2877 ; CHECK-NEXT: retl 2878 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2879 ret <8 x float> %res 2880 } 2881 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2882 2883 2884 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2885 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 2886 ; add operation forces the execution domain. 2887 ; CHECK-LABEL: test_x86_avx_storeu_dq_256: 2888 ; CHECK: # BB#0: 2889 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2890 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 2891 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2892 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2893 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2894 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2895 ; CHECK-NEXT: vmovups %ymm0, (%eax) 2896 ; CHECK-NEXT: vzeroupper 2897 ; CHECK-NEXT: retl 2898 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2899 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 2900 ret void 2901 } 2902 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2903 2904 2905 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2906 ; add operation forces the execution domain. 2907 ; CHECK-LABEL: test_x86_avx_storeu_pd_256: 2908 ; CHECK: # BB#0: 2909 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2910 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 2911 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 2912 ; CHECK-NEXT: vmovupd %ymm0, (%eax) 2913 ; CHECK-NEXT: vzeroupper 2914 ; CHECK-NEXT: retl 2915 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2916 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 2917 ret void 2918 } 2919 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2920 2921 2922 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2923 ; CHECK-LABEL: test_x86_avx_storeu_ps_256: 2924 ; CHECK: # BB#0: 2925 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2926 ; CHECK-NEXT: vmovups %ymm0, (%eax) 2927 ; CHECK-NEXT: vzeroupper 2928 ; CHECK-NEXT: retl 2929 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2930 ret void 2931 } 2932 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2933 2934 2935 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2936 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256: 2937 ; CHECK: # BB#0: 2938 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2939 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2940 ; CHECK-NEXT: retl 2941 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2942 ret <4 x double> %res 2943 } 2944 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2945 2946 2947 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2948 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256: 2949 ; CHECK: # BB#0: 2950 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2951 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2952 ; CHECK-NEXT: retl 2953 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2954 ret <8 x float> %res 2955 } 2956 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2957 2958 2959 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2960 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256: 2961 ; CHECK: # BB#0: 2962 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2963 ; CHECK-NEXT: retl 2964 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2965 ret <4 x double> %res 2966 } 2967 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2968 2969 2970 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2971 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256: 2972 ; CHECK: # BB#0: 2973 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2974 ; CHECK-NEXT: retl 2975 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2976 ret <8 x float> %res 2977 } 2978 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2979 2980 2981 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2982 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256: 2983 ; CHECK: # BB#0: 2984 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2985 ; CHECK-NEXT: retl 2986 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2987 ret <8 x i32> %res 2988 } 2989 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2990 2991 2992 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2993 ; CHECK-LABEL: test_x86_avx_vpermil_pd: 2994 ; CHECK: # BB#0: 2995 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2996 ; CHECK-NEXT: retl 2997 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 2998 ret <2 x double> %res 2999 } 3000 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 3001 3002 3003 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 3004 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256: 3005 ; CHECK: # BB#0: 3006 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 3007 ; CHECK-NEXT: retl 3008 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 3009 ret <4 x double> %res 3010 } 3011 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 3012 3013 3014 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 3015 ; CHECK-LABEL: test_x86_avx_vpermil_ps: 3016 ; CHECK: # BB#0: 3017 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0] 3018 ; CHECK-NEXT: retl 3019 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 3020 ret <4 x float> %res 3021 } 3022 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 3023 3024 3025 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 3026 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256: 3027 ; CHECK: # BB#0: 3028 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4] 3029 ; CHECK-NEXT: retl 3030 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 3031 ret <8 x float> %res 3032 } 3033 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 3034 3035 3036 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 3037 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd: 3038 ; CHECK: # BB#0: 3039 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 3040 ; CHECK-NEXT: retl 3041 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 3042 ret <2 x double> %res 3043 } 3044 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 3045 3046 3047 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 3048 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256: 3049 ; CHECK: # BB#0: 3050 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 3051 ; CHECK-NEXT: retl 3052 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 3053 ret <4 x double> %res 3054 } 3055 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 3056 3057 3058 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 3059 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps: 3060 ; CHECK: # BB#0: 3061 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 3062 ; CHECK-NEXT: retl 3063 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 3064 ret <4 x float> %res 3065 } 3066 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 3067 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load: 3068 ; CHECK: # BB#0: 3069 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3070 ; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0 3071 ; CHECK-NEXT: retl 3072 %a2 = load <4 x i32>, <4 x i32>* %a1 3073 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 3074 ret <4 x float> %res 3075 } 3076 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 3077 3078 3079 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 3080 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256: 3081 ; CHECK: # BB#0: 3082 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 3083 ; CHECK-NEXT: retl 3084 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 3085 ret <8 x float> %res 3086 } 3087 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 3088 3089 3090 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 3091 ; CHECK-LABEL: test_x86_avx_vtestc_pd: 3092 ; CHECK: # BB#0: 3093 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 3094 ; CHECK-NEXT: sbbl %eax, %eax 3095 ; CHECK-NEXT: andl $1, %eax 3096 ; CHECK-NEXT: retl 3097 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3098 ret i32 %res 3099 } 3100 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 3101 3102 3103 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3104 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256: 3105 ; CHECK: # BB#0: 3106 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 3107 ; CHECK-NEXT: sbbl %eax, %eax 3108 ; CHECK-NEXT: andl $1, %eax 3109 ; CHECK-NEXT: vzeroupper 3110 ; CHECK-NEXT: retl 3111 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3112 ret i32 %res 3113 } 3114 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3115 3116 3117 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 3118 ; CHECK-LABEL: test_x86_avx_vtestc_ps: 3119 ; CHECK: # BB#0: 3120 ; CHECK-NEXT: vtestps %xmm1, %xmm0 3121 ; CHECK-NEXT: sbbl %eax, %eax 3122 ; CHECK-NEXT: andl $1, %eax 3123 ; CHECK-NEXT: retl 3124 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3125 ret i32 %res 3126 } 3127 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 3128 3129 3130 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3131 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256: 3132 ; CHECK: # BB#0: 3133 ; CHECK-NEXT: vtestps %ymm1, %ymm0 3134 ; CHECK-NEXT: sbbl %eax, %eax 3135 ; CHECK-NEXT: andl $1, %eax 3136 ; CHECK-NEXT: vzeroupper 3137 ; CHECK-NEXT: retl 3138 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3139 ret i32 %res 3140 } 3141 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3142 3143 3144 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 3145 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd: 3146 ; CHECK: # BB#0: 3147 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 3148 ; CHECK-NEXT: seta %al 3149 ; CHECK-NEXT: movzbl %al, %eax 3150 ; CHECK-NEXT: retl 3151 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3152 ret i32 %res 3153 } 3154 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 3155 3156 3157 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3158 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256: 3159 ; CHECK: # BB#0: 3160 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 3161 ; CHECK-NEXT: seta %al 3162 ; CHECK-NEXT: movzbl %al, %eax 3163 ; CHECK-NEXT: vzeroupper 3164 ; CHECK-NEXT: retl 3165 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3166 ret i32 %res 3167 } 3168 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3169 3170 3171 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 3172 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps: 3173 ; CHECK: # BB#0: 3174 ; CHECK-NEXT: vtestps %xmm1, %xmm0 3175 ; CHECK-NEXT: seta %al 3176 ; CHECK-NEXT: movzbl %al, %eax 3177 ; CHECK-NEXT: retl 3178 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3179 ret i32 %res 3180 } 3181 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 3182 3183 3184 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3185 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256: 3186 ; CHECK: # BB#0: 3187 ; CHECK-NEXT: vtestps %ymm1, %ymm0 3188 ; CHECK-NEXT: seta %al 3189 ; CHECK-NEXT: movzbl %al, %eax 3190 ; CHECK-NEXT: vzeroupper 3191 ; CHECK-NEXT: retl 3192 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3193 ret i32 %res 3194 } 3195 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3196 3197 3198 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 3199 ; CHECK-LABEL: test_x86_avx_vtestz_pd: 3200 ; CHECK: # BB#0: 3201 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 3202 ; CHECK-NEXT: sete %al 3203 ; CHECK-NEXT: movzbl %al, %eax 3204 ; CHECK-NEXT: retl 3205 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3206 ret i32 %res 3207 } 3208 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 3209 3210 3211 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 3212 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256: 3213 ; CHECK: # BB#0: 3214 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 3215 ; CHECK-NEXT: sete %al 3216 ; CHECK-NEXT: movzbl %al, %eax 3217 ; CHECK-NEXT: vzeroupper 3218 ; CHECK-NEXT: retl 3219 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3220 ret i32 %res 3221 } 3222 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 3223 3224 3225 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 3226 ; CHECK-LABEL: test_x86_avx_vtestz_ps: 3227 ; CHECK: # BB#0: 3228 ; CHECK-NEXT: vtestps %xmm1, %xmm0 3229 ; CHECK-NEXT: sete %al 3230 ; CHECK-NEXT: movzbl %al, %eax 3231 ; CHECK-NEXT: retl 3232 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3233 ret i32 %res 3234 } 3235 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 3236 3237 3238 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 3239 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256: 3240 ; CHECK: # BB#0: 3241 ; CHECK-NEXT: vtestps %ymm1, %ymm0 3242 ; CHECK-NEXT: sete %al 3243 ; CHECK-NEXT: movzbl %al, %eax 3244 ; CHECK-NEXT: vzeroupper 3245 ; CHECK-NEXT: retl 3246 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3247 ret i32 %res 3248 } 3249 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 3250 3251 3252 define void @test_x86_avx_vzeroall() { 3253 ; CHECK-LABEL: test_x86_avx_vzeroall: 3254 ; CHECK: # BB#0: 3255 ; CHECK-NEXT: vzeroall 3256 ; CHECK-NEXT: vzeroupper 3257 ; CHECK-NEXT: retl 3258 call void @llvm.x86.avx.vzeroall() 3259 ret void 3260 } 3261 declare void @llvm.x86.avx.vzeroall() nounwind 3262 3263 3264 define void @test_x86_avx_vzeroupper() { 3265 ; CHECK-LABEL: test_x86_avx_vzeroupper: 3266 ; CHECK: # BB#0: 3267 ; CHECK-NEXT: vzeroupper 3268 ; CHECK-NEXT: vzeroupper 3269 ; CHECK-NEXT: retl 3270 call void @llvm.x86.avx.vzeroupper() 3271 ret void 3272 } 3273 declare void @llvm.x86.avx.vzeroupper() nounwind 3274 3275 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 3276 3277 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 3278 ; CHECK-LABEL: monitor: 3279 ; CHECK: # BB#0: 3280 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 3281 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3282 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3283 ; CHECK-NEXT: leal (%eax), %eax 3284 ; CHECK-NEXT: monitor 3285 ; CHECK-NEXT: retl 3286 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 3287 ret void 3288 } 3289 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 3290 3291 define void @mwait(i32 %E, i32 %H) nounwind { 3292 ; CHECK-LABEL: mwait: 3293 ; CHECK: # BB#0: 3294 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3295 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3296 ; CHECK-NEXT: mwait 3297 ; CHECK-NEXT: retl 3298 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 3299 ret void 3300 } 3301 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 3302 3303 define void @sfence() nounwind { 3304 ; CHECK-LABEL: sfence: 3305 ; CHECK: # BB#0: 3306 ; CHECK-NEXT: sfence 3307 ; CHECK-NEXT: retl 3308 tail call void @llvm.x86.sse.sfence() 3309 ret void 3310 } 3311 declare void @llvm.x86.sse.sfence() nounwind 3312 3313 define void @lfence() nounwind { 3314 ; CHECK-LABEL: lfence: 3315 ; CHECK: # BB#0: 3316 ; CHECK-NEXT: lfence 3317 ; CHECK-NEXT: retl 3318 tail call void @llvm.x86.sse2.lfence() 3319 ret void 3320 } 3321 declare void @llvm.x86.sse2.lfence() nounwind 3322 3323 define void @mfence() nounwind { 3324 ; CHECK-LABEL: mfence: 3325 ; CHECK: # BB#0: 3326 ; CHECK-NEXT: mfence 3327 ; CHECK-NEXT: retl 3328 tail call void @llvm.x86.sse2.mfence() 3329 ret void 3330 } 3331 declare void @llvm.x86.sse2.mfence() nounwind 3332 3333 define void @clflush(i8* %p) nounwind { 3334 ; CHECK-LABEL: clflush: 3335 ; CHECK: # BB#0: 3336 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3337 ; CHECK-NEXT: clflush (%eax) 3338 ; CHECK-NEXT: retl 3339 tail call void @llvm.x86.sse2.clflush(i8* %p) 3340 ret void 3341 } 3342 declare void @llvm.x86.sse2.clflush(i8*) nounwind 3343 3344 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { 3345 ; CHECK-LABEL: crc32_32_8: 3346 ; CHECK: # BB#0: 3347 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3348 ; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax 3349 ; CHECK-NEXT: retl 3350 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) 3351 ret i32 %tmp 3352 } 3353 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 3354 3355 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { 3356 ; CHECK-LABEL: crc32_32_16: 3357 ; CHECK: # BB#0: 3358 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3359 ; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax 3360 ; CHECK-NEXT: retl 3361 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) 3362 ret i32 %tmp 3363 } 3364 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 3365 3366 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { 3367 ; CHECK-LABEL: crc32_32_32: 3368 ; CHECK: # BB#0: 3369 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3370 ; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax 3371 ; CHECK-NEXT: retl 3372 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) 3373 ret i32 %tmp 3374 } 3375 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 3376 3377 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { 3378 ; CHECK-LABEL: movnt_dq: 3379 ; CHECK: # BB#0: 3380 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3381 ; CHECK-NEXT: vpaddq LCPI282_0, %xmm0, %xmm0 3382 ; CHECK-NEXT: vmovntdq %ymm0, (%eax) 3383 ; CHECK-NEXT: vzeroupper 3384 ; CHECK-NEXT: retl 3385 %a2 = add <2 x i64> %a1, <i64 1, i64 1> 3386 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 3387 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind 3388 ret void 3389 } 3390 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 3391 3392 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 3393 ; CHECK-LABEL: movnt_ps: 3394 ; CHECK: # BB#0: 3395 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3396 ; CHECK-NEXT: vmovntps %ymm0, (%eax) 3397 ; CHECK-NEXT: vzeroupper 3398 ; CHECK-NEXT: retl 3399 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 3400 ret void 3401 } 3402 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 3403 3404 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 3405 ; add operation forces the execution domain. 3406 ; CHECK-LABEL: movnt_pd: 3407 ; CHECK: # BB#0: 3408 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3409 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 3410 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 3411 ; CHECK-NEXT: vmovntpd %ymm0, (%eax) 3412 ; CHECK-NEXT: vzeroupper 3413 ; CHECK-NEXT: retl 3414 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 3415 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 3416 ret void 3417 } 3418 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 3419 3420 3421 ; Check for pclmulqdq 3422 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 3423 ; CHECK-LABEL: test_x86_pclmulqdq: 3424 ; CHECK: # BB#0: 3425 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 3426 ; CHECK-NEXT: retl 3427 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 3428 ret <2 x i64> %res 3429 } 3430 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 3431