1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s 3 4 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 5 ; CHECK-LABEL: test_x86_aesni_aesdec: 6 ; CHECK: ## BB#0: 7 ; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0 8 ; CHECK-NEXT: retl 9 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 10 ret <2 x i64> %res 11 } 12 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 13 14 15 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 16 ; CHECK-LABEL: test_x86_aesni_aesdeclast: 17 ; CHECK: ## BB#0: 18 ; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 19 ; CHECK-NEXT: retl 20 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 21 ret <2 x i64> %res 22 } 23 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 24 25 26 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 27 ; CHECK-LABEL: test_x86_aesni_aesenc: 28 ; CHECK: ## BB#0: 29 ; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0 30 ; CHECK-NEXT: retl 31 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 32 ret <2 x i64> %res 33 } 34 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 35 36 37 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 38 ; CHECK-LABEL: test_x86_aesni_aesenclast: 39 ; CHECK: ## BB#0: 40 ; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 41 ; CHECK-NEXT: retl 42 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 43 ret <2 x i64> %res 44 } 45 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 46 47 48 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 49 ; CHECK-LABEL: test_x86_aesni_aesimc: 50 ; CHECK: ## BB#0: 51 ; CHECK-NEXT: vaesimc %xmm0, %xmm0 52 ; CHECK-NEXT: retl 53 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 54 ret <2 x i64> %res 55 } 56 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 57 58 59 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 60 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist: 61 ; CHECK: ## BB#0: 62 ; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 63 ; CHECK-NEXT: retl 64 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 65 ret <2 x i64> %res 66 } 67 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 68 69 70 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 71 ; CHECK-LABEL: test_x86_sse2_add_sd: 72 ; CHECK: ## BB#0: 73 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 74 ; CHECK-NEXT: retl 75 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 76 ret <2 x double> %res 77 } 78 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 79 80 81 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 82 ; CHECK-LABEL: test_x86_sse2_cmp_pd: 83 ; CHECK: ## BB#0: 84 ; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 85 ; CHECK-NEXT: retl 86 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 87 ret <2 x double> %res 88 } 89 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 90 91 92 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 93 ; CHECK-LABEL: test_x86_sse2_cmp_sd: 94 ; CHECK: ## BB#0: 95 ; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 96 ; CHECK-NEXT: retl 97 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 98 ret <2 x double> %res 99 } 100 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 101 102 103 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 104 ; CHECK-LABEL: test_x86_sse2_comieq_sd: 105 ; CHECK: ## BB#0: 106 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 107 ; CHECK-NEXT: sete %al 108 ; CHECK-NEXT: movzbl %al, %eax 109 ; CHECK-NEXT: retl 110 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 111 ret i32 %res 112 } 113 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 114 115 116 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 117 ; CHECK-LABEL: test_x86_sse2_comige_sd: 118 ; CHECK: ## BB#0: 119 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 120 ; CHECK-NEXT: setae %al 121 ; CHECK-NEXT: movzbl %al, %eax 122 ; CHECK-NEXT: retl 123 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 124 ret i32 %res 125 } 126 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 127 128 129 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 130 ; CHECK-LABEL: test_x86_sse2_comigt_sd: 131 ; CHECK: ## BB#0: 132 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 133 ; CHECK-NEXT: seta %al 134 ; CHECK-NEXT: movzbl %al, %eax 135 ; CHECK-NEXT: retl 136 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 137 ret i32 %res 138 } 139 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 140 141 142 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 143 ; CHECK-LABEL: test_x86_sse2_comile_sd: 144 ; CHECK: ## BB#0: 145 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 146 ; CHECK-NEXT: setbe %al 147 ; CHECK-NEXT: movzbl %al, %eax 148 ; CHECK-NEXT: retl 149 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 150 ret i32 %res 151 } 152 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 153 154 155 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 156 ; CHECK-LABEL: test_x86_sse2_comilt_sd: 157 ; CHECK: ## BB#0: 158 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 159 ; CHECK-NEXT: sbbl %eax, %eax 160 ; CHECK-NEXT: andl $1, %eax 161 ; CHECK-NEXT: retl 162 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 163 ret i32 %res 164 } 165 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 166 167 168 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 169 ; CHECK-LABEL: test_x86_sse2_comineq_sd: 170 ; CHECK: ## BB#0: 171 ; CHECK-NEXT: vcomisd %xmm1, %xmm0 172 ; CHECK-NEXT: setne %al 173 ; CHECK-NEXT: movzbl %al, %eax 174 ; CHECK-NEXT: retl 175 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 176 ret i32 %res 177 } 178 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 179 180 181 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 182 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd: 183 ; CHECK: ## BB#0: 184 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 185 ; CHECK-NEXT: retl 186 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 187 ret <2 x double> %res 188 } 189 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 190 191 192 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 193 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps: 194 ; CHECK: ## BB#0: 195 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 196 ; CHECK-NEXT: retl 197 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 198 ret <4 x float> %res 199 } 200 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 201 202 203 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 204 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq: 205 ; CHECK: ## BB#0: 206 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 207 ; CHECK-NEXT: retl 208 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 209 ret <4 x i32> %res 210 } 211 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 212 213 214 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 215 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps: 216 ; CHECK: ## BB#0: 217 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 218 ; CHECK-NEXT: retl 219 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 220 ret <4 x float> %res 221 } 222 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 223 224 225 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 226 ; CHECK-LABEL: test_x86_sse2_cvtps2dq: 227 ; CHECK: ## BB#0: 228 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 229 ; CHECK-NEXT: retl 230 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 231 ret <4 x i32> %res 232 } 233 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 234 235 236 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 237 ; CHECK-LABEL: test_x86_sse2_cvtps2pd: 238 ; CHECK: ## BB#0: 239 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 240 ; CHECK-NEXT: retl 241 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 242 ret <2 x double> %res 243 } 244 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 245 246 247 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 248 ; CHECK-LABEL: test_x86_sse2_cvtsd2si: 249 ; CHECK: ## BB#0: 250 ; CHECK-NEXT: vcvtsd2si %xmm0, %eax 251 ; CHECK-NEXT: retl 252 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 253 ret i32 %res 254 } 255 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 256 257 258 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 259 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss: 260 ; CHECK: ## BB#0: 261 ; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 262 ; CHECK-NEXT: retl 263 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 264 ret <4 x float> %res 265 } 266 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 267 268 269 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 270 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd: 271 ; CHECK: ## BB#0: 272 ; CHECK-NEXT: movl $7, %eax 273 ; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 274 ; CHECK-NEXT: retl 275 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 276 ret <2 x double> %res 277 } 278 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 279 280 281 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 282 ; CHECK-LABEL: test_x86_sse2_cvtss2sd: 283 ; CHECK: ## BB#0: 284 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 285 ; CHECK-NEXT: retl 286 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 287 ret <2 x double> %res 288 } 289 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 290 291 292 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 293 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq: 294 ; CHECK: ## BB#0: 295 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 296 ; CHECK-NEXT: retl 297 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 298 ret <4 x i32> %res 299 } 300 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 301 302 303 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 304 ; CHECK-LABEL: test_x86_sse2_cvttps2dq: 305 ; CHECK: ## BB#0: 306 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 307 ; CHECK-NEXT: retl 308 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 309 ret <4 x i32> %res 310 } 311 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 312 313 314 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 315 ; CHECK-LABEL: test_x86_sse2_cvttsd2si: 316 ; CHECK: ## BB#0: 317 ; CHECK-NEXT: vcvttsd2si %xmm0, %eax 318 ; CHECK-NEXT: retl 319 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 320 ret i32 %res 321 } 322 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 323 324 325 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 326 ; CHECK-LABEL: test_x86_sse2_div_sd: 327 ; CHECK: ## BB#0: 328 ; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 329 ; CHECK-NEXT: retl 330 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 331 ret <2 x double> %res 332 } 333 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 334 335 336 337 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 338 ; CHECK-LABEL: test_x86_sse2_max_pd: 339 ; CHECK: ## BB#0: 340 ; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 341 ; CHECK-NEXT: retl 342 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 343 ret <2 x double> %res 344 } 345 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 346 347 348 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 349 ; CHECK-LABEL: test_x86_sse2_max_sd: 350 ; CHECK: ## BB#0: 351 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 352 ; CHECK-NEXT: retl 353 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 354 ret <2 x double> %res 355 } 356 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 357 358 359 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 360 ; CHECK-LABEL: test_x86_sse2_min_pd: 361 ; CHECK: ## BB#0: 362 ; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0 363 ; CHECK-NEXT: retl 364 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 365 ret <2 x double> %res 366 } 367 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 368 369 370 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 371 ; CHECK-LABEL: test_x86_sse2_min_sd: 372 ; CHECK: ## BB#0: 373 ; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 374 ; CHECK-NEXT: retl 375 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 376 ret <2 x double> %res 377 } 378 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 379 380 381 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 382 ; CHECK-LABEL: test_x86_sse2_movmsk_pd: 383 ; CHECK: ## BB#0: 384 ; CHECK-NEXT: vmovmskpd %xmm0, %eax 385 ; CHECK-NEXT: retl 386 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 387 ret i32 %res 388 } 389 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 390 391 392 393 394 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 395 ; CHECK-LABEL: test_x86_sse2_mul_sd: 396 ; CHECK: ## BB#0: 397 ; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 398 ; CHECK-NEXT: retl 399 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 400 ret <2 x double> %res 401 } 402 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 403 404 405 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 406 ; CHECK-LABEL: test_x86_sse2_packssdw_128: 407 ; CHECK: ## BB#0: 408 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 409 ; CHECK-NEXT: retl 410 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 411 ret <8 x i16> %res 412 } 413 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 414 415 416 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 417 ; CHECK-LABEL: test_x86_sse2_packsswb_128: 418 ; CHECK: ## BB#0: 419 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 420 ; CHECK-NEXT: retl 421 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 422 ret <16 x i8> %res 423 } 424 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 425 426 427 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 428 ; CHECK-LABEL: test_x86_sse2_packuswb_128: 429 ; CHECK: ## BB#0: 430 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 431 ; CHECK-NEXT: retl 432 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 433 ret <16 x i8> %res 434 } 435 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 436 437 438 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 439 ; CHECK-LABEL: test_x86_sse2_padds_b: 440 ; CHECK: ## BB#0: 441 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 442 ; CHECK-NEXT: retl 443 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 444 ret <16 x i8> %res 445 } 446 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 447 448 449 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 450 ; CHECK-LABEL: test_x86_sse2_padds_w: 451 ; CHECK: ## BB#0: 452 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 453 ; CHECK-NEXT: retl 454 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 455 ret <8 x i16> %res 456 } 457 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 458 459 460 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 461 ; CHECK-LABEL: test_x86_sse2_paddus_b: 462 ; CHECK: ## BB#0: 463 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 464 ; CHECK-NEXT: retl 465 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 466 ret <16 x i8> %res 467 } 468 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 469 470 471 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 472 ; CHECK-LABEL: test_x86_sse2_paddus_w: 473 ; CHECK: ## BB#0: 474 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 475 ; CHECK-NEXT: retl 476 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 477 ret <8 x i16> %res 478 } 479 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 480 481 482 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 483 ; CHECK-LABEL: test_x86_sse2_pavg_b: 484 ; CHECK: ## BB#0: 485 ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 486 ; CHECK-NEXT: retl 487 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 488 ret <16 x i8> %res 489 } 490 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 491 492 493 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 494 ; CHECK-LABEL: test_x86_sse2_pavg_w: 495 ; CHECK: ## BB#0: 496 ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 497 ; CHECK-NEXT: retl 498 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 499 ret <8 x i16> %res 500 } 501 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 502 503 504 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 505 ; CHECK-LABEL: test_x86_sse2_pmadd_wd: 506 ; CHECK: ## BB#0: 507 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 508 ; CHECK-NEXT: retl 509 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 510 ret <4 x i32> %res 511 } 512 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 513 514 515 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 516 ; CHECK-LABEL: test_x86_sse2_pmaxs_w: 517 ; CHECK: ## BB#0: 518 ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 519 ; CHECK-NEXT: retl 520 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 521 ret <8 x i16> %res 522 } 523 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 524 525 526 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 527 ; CHECK-LABEL: test_x86_sse2_pmaxu_b: 528 ; CHECK: ## BB#0: 529 ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 530 ; CHECK-NEXT: retl 531 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 532 ret <16 x i8> %res 533 } 534 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 535 536 537 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 538 ; CHECK-LABEL: test_x86_sse2_pmins_w: 539 ; CHECK: ## BB#0: 540 ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 541 ; CHECK-NEXT: retl 542 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 543 ret <8 x i16> %res 544 } 545 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 546 547 548 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 549 ; CHECK-LABEL: test_x86_sse2_pminu_b: 550 ; CHECK: ## BB#0: 551 ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 552 ; CHECK-NEXT: retl 553 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 554 ret <16 x i8> %res 555 } 556 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 557 558 559 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 560 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128: 561 ; CHECK: ## BB#0: 562 ; CHECK-NEXT: vpmovmskb %xmm0, %eax 563 ; CHECK-NEXT: retl 564 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 565 ret i32 %res 566 } 567 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 568 569 570 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 571 ; CHECK-LABEL: test_x86_sse2_pmulh_w: 572 ; CHECK: ## BB#0: 573 ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 574 ; CHECK-NEXT: retl 575 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 576 ret <8 x i16> %res 577 } 578 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 579 580 581 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 582 ; CHECK-LABEL: test_x86_sse2_pmulhu_w: 583 ; CHECK: ## BB#0: 584 ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 585 ; CHECK-NEXT: retl 586 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 587 ret <8 x i16> %res 588 } 589 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 590 591 592 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 593 ; CHECK-LABEL: test_x86_sse2_pmulu_dq: 594 ; CHECK: ## BB#0: 595 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 596 ; CHECK-NEXT: retl 597 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 598 ret <2 x i64> %res 599 } 600 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 601 602 603 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 604 ; CHECK-LABEL: test_x86_sse2_psad_bw: 605 ; CHECK: ## BB#0: 606 ; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 607 ; CHECK-NEXT: retl 608 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 609 ret <2 x i64> %res 610 } 611 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 612 613 614 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 615 ; CHECK-LABEL: test_x86_sse2_psll_d: 616 ; CHECK: ## BB#0: 617 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 618 ; CHECK-NEXT: retl 619 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 620 ret <4 x i32> %res 621 } 622 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 623 624 625 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 626 ; CHECK-LABEL: test_x86_sse2_psll_q: 627 ; CHECK: ## BB#0: 628 ; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0 629 ; CHECK-NEXT: retl 630 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 631 ret <2 x i64> %res 632 } 633 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 634 635 636 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 637 ; CHECK-LABEL: test_x86_sse2_psll_w: 638 ; CHECK: ## BB#0: 639 ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 640 ; CHECK-NEXT: retl 641 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 642 ret <8 x i16> %res 643 } 644 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 645 646 647 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 648 ; CHECK-LABEL: test_x86_sse2_pslli_d: 649 ; CHECK: ## BB#0: 650 ; CHECK-NEXT: vpslld $7, %xmm0, %xmm0 651 ; CHECK-NEXT: retl 652 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 653 ret <4 x i32> %res 654 } 655 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 656 657 658 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 659 ; CHECK-LABEL: test_x86_sse2_pslli_q: 660 ; CHECK: ## BB#0: 661 ; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0 662 ; CHECK-NEXT: retl 663 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 664 ret <2 x i64> %res 665 } 666 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 667 668 669 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 670 ; CHECK-LABEL: test_x86_sse2_pslli_w: 671 ; CHECK: ## BB#0: 672 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 673 ; CHECK-NEXT: retl 674 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 675 ret <8 x i16> %res 676 } 677 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 678 679 680 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 681 ; CHECK-LABEL: test_x86_sse2_psra_d: 682 ; CHECK: ## BB#0: 683 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 684 ; CHECK-NEXT: retl 685 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 686 ret <4 x i32> %res 687 } 688 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 689 690 691 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 692 ; CHECK-LABEL: test_x86_sse2_psra_w: 693 ; CHECK: ## BB#0: 694 ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 695 ; CHECK-NEXT: retl 696 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 697 ret <8 x i16> %res 698 } 699 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 700 701 702 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 703 ; CHECK-LABEL: test_x86_sse2_psrai_d: 704 ; CHECK: ## BB#0: 705 ; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0 706 ; CHECK-NEXT: retl 707 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 708 ret <4 x i32> %res 709 } 710 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 711 712 713 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 714 ; CHECK-LABEL: test_x86_sse2_psrai_w: 715 ; CHECK: ## BB#0: 716 ; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0 717 ; CHECK-NEXT: retl 718 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 719 ret <8 x i16> %res 720 } 721 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 722 723 724 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 725 ; CHECK-LABEL: test_x86_sse2_psrl_d: 726 ; CHECK: ## BB#0: 727 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 728 ; CHECK-NEXT: retl 729 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 730 ret <4 x i32> %res 731 } 732 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 733 734 735 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 736 ; CHECK-LABEL: test_x86_sse2_psrl_q: 737 ; CHECK: ## BB#0: 738 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 739 ; CHECK-NEXT: retl 740 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 741 ret <2 x i64> %res 742 } 743 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 744 745 746 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 747 ; CHECK-LABEL: test_x86_sse2_psrl_w: 748 ; CHECK: ## BB#0: 749 ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 750 ; CHECK-NEXT: retl 751 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 752 ret <8 x i16> %res 753 } 754 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 755 756 757 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 758 ; CHECK-LABEL: test_x86_sse2_psrli_d: 759 ; CHECK: ## BB#0: 760 ; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0 761 ; CHECK-NEXT: retl 762 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 763 ret <4 x i32> %res 764 } 765 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 766 767 768 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 769 ; CHECK-LABEL: test_x86_sse2_psrli_q: 770 ; CHECK: ## BB#0: 771 ; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0 772 ; CHECK-NEXT: retl 773 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 774 ret <2 x i64> %res 775 } 776 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 777 778 779 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 780 ; CHECK-LABEL: test_x86_sse2_psrli_w: 781 ; CHECK: ## BB#0: 782 ; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0 783 ; CHECK-NEXT: retl 784 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 785 ret <8 x i16> %res 786 } 787 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 788 789 790 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 791 ; CHECK-LABEL: test_x86_sse2_psubs_b: 792 ; CHECK: ## BB#0: 793 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 794 ; CHECK-NEXT: retl 795 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 796 ret <16 x i8> %res 797 } 798 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 799 800 801 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 802 ; CHECK-LABEL: test_x86_sse2_psubs_w: 803 ; CHECK: ## BB#0: 804 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 805 ; CHECK-NEXT: retl 806 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 807 ret <8 x i16> %res 808 } 809 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 810 811 812 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 813 ; CHECK-LABEL: test_x86_sse2_psubus_b: 814 ; CHECK: ## BB#0: 815 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 816 ; CHECK-NEXT: retl 817 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 818 ret <16 x i8> %res 819 } 820 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 821 822 823 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 824 ; CHECK-LABEL: test_x86_sse2_psubus_w: 825 ; CHECK: ## BB#0: 826 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 827 ; CHECK-NEXT: retl 828 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 829 ret <8 x i16> %res 830 } 831 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 832 833 834 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 835 ; CHECK-LABEL: test_x86_sse2_sqrt_pd: 836 ; CHECK: ## BB#0: 837 ; CHECK-NEXT: vsqrtpd %xmm0, %xmm0 838 ; CHECK-NEXT: retl 839 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 840 ret <2 x double> %res 841 } 842 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 843 844 845 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 846 ; CHECK-LABEL: test_x86_sse2_sqrt_sd: 847 ; CHECK: ## BB#0: 848 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 849 ; CHECK-NEXT: retl 850 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 851 ret <2 x double> %res 852 } 853 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 854 855 856 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 857 ; CHECK-LABEL: test_x86_sse2_storel_dq: 858 ; CHECK: ## BB#0: 859 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 860 ; CHECK-NEXT: vmovlps %xmm0, (%eax) 861 ; CHECK-NEXT: retl 862 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 863 ret void 864 } 865 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 866 867 868 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 869 ; add operation forces the execution domain. 870 ; CHECK-LABEL: test_x86_sse2_storeu_dq: 871 ; CHECK: ## BB#0: 872 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 873 ; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 874 ; CHECK-NEXT: vmovdqu %xmm0, (%eax) 875 ; CHECK-NEXT: retl 876 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 877 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 878 ret void 879 } 880 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 881 882 883 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 884 ; fadd operation forces the execution domain. 885 ; CHECK-LABEL: test_x86_sse2_storeu_pd: 886 ; CHECK: ## BB#0: 887 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 888 ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 889 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 890 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 891 ; CHECK-NEXT: vmovupd %xmm0, (%eax) 892 ; CHECK-NEXT: retl 893 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 894 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 895 ret void 896 } 897 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 898 899 900 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 901 ; CHECK-LABEL: test_x86_sse2_sub_sd: 902 ; CHECK: ## BB#0: 903 ; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 904 ; CHECK-NEXT: retl 905 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 906 ret <2 x double> %res 907 } 908 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 909 910 911 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 912 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd: 913 ; CHECK: ## BB#0: 914 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 915 ; CHECK-NEXT: sete %al 916 ; CHECK-NEXT: movzbl %al, %eax 917 ; CHECK-NEXT: retl 918 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 919 ret i32 %res 920 } 921 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 922 923 924 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 925 ; CHECK-LABEL: test_x86_sse2_ucomige_sd: 926 ; CHECK: ## BB#0: 927 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 928 ; CHECK-NEXT: setae %al 929 ; CHECK-NEXT: movzbl %al, %eax 930 ; CHECK-NEXT: retl 931 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 932 ret i32 %res 933 } 934 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 935 936 937 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 938 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd: 939 ; CHECK: ## BB#0: 940 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 941 ; CHECK-NEXT: seta %al 942 ; CHECK-NEXT: movzbl %al, %eax 943 ; CHECK-NEXT: retl 944 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 945 ret i32 %res 946 } 947 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 948 949 950 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 951 ; CHECK-LABEL: test_x86_sse2_ucomile_sd: 952 ; CHECK: ## BB#0: 953 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 954 ; CHECK-NEXT: setbe %al 955 ; CHECK-NEXT: movzbl %al, %eax 956 ; CHECK-NEXT: retl 957 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 958 ret i32 %res 959 } 960 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 961 962 963 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 964 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd: 965 ; CHECK: ## BB#0: 966 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 967 ; CHECK-NEXT: sbbl %eax, %eax 968 ; CHECK-NEXT: andl $1, %eax 969 ; CHECK-NEXT: retl 970 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 971 ret i32 %res 972 } 973 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 974 975 976 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 977 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd: 978 ; CHECK: ## BB#0: 979 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 980 ; CHECK-NEXT: setne %al 981 ; CHECK-NEXT: movzbl %al, %eax 982 ; CHECK-NEXT: retl 983 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 984 ret i32 %res 985 } 986 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 987 988 989 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 990 ; CHECK-LABEL: test_x86_sse3_addsub_pd: 991 ; CHECK: ## BB#0: 992 ; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 993 ; CHECK-NEXT: retl 994 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 995 ret <2 x double> %res 996 } 997 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 998 999 1000 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 1001 ; CHECK-LABEL: test_x86_sse3_addsub_ps: 1002 ; CHECK: ## BB#0: 1003 ; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 1004 ; CHECK-NEXT: retl 1005 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1006 ret <4 x float> %res 1007 } 1008 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 1009 1010 1011 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 1012 ; CHECK-LABEL: test_x86_sse3_hadd_pd: 1013 ; CHECK: ## BB#0: 1014 ; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 1015 ; CHECK-NEXT: retl 1016 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1017 ret <2 x double> %res 1018 } 1019 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 1020 1021 1022 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 1023 ; CHECK-LABEL: test_x86_sse3_hadd_ps: 1024 ; CHECK: ## BB#0: 1025 ; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0 1026 ; CHECK-NEXT: retl 1027 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1028 ret <4 x float> %res 1029 } 1030 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 1031 1032 1033 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 1034 ; CHECK-LABEL: test_x86_sse3_hsub_pd: 1035 ; CHECK: ## BB#0: 1036 ; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 1037 ; CHECK-NEXT: retl 1038 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1039 ret <2 x double> %res 1040 } 1041 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 1042 1043 1044 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 1045 ; CHECK-LABEL: test_x86_sse3_hsub_ps: 1046 ; CHECK: ## BB#0: 1047 ; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0 1048 ; CHECK-NEXT: retl 1049 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1050 ret <4 x float> %res 1051 } 1052 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 1053 1054 1055 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 1056 ; CHECK-LABEL: test_x86_sse3_ldu_dq: 1057 ; CHECK: ## BB#0: 1058 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1059 ; CHECK-NEXT: vlddqu (%eax), %xmm0 1060 ; CHECK-NEXT: retl 1061 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 1062 ret <16 x i8> %res 1063 } 1064 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 1065 1066 1067 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 1068 ; CHECK-LABEL: test_x86_sse41_blendvpd: 1069 ; CHECK: ## BB#0: 1070 ; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1071 ; CHECK-NEXT: retl 1072 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 1073 ret <2 x double> %res 1074 } 1075 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 1076 1077 1078 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 1079 ; CHECK-LABEL: test_x86_sse41_blendvps: 1080 ; CHECK: ## BB#0: 1081 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1082 ; CHECK-NEXT: retl 1083 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 1084 ret <4 x float> %res 1085 } 1086 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 1087 1088 1089 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 1090 ; CHECK-LABEL: test_x86_sse41_dppd: 1091 ; CHECK: ## BB#0: 1092 ; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 1093 ; CHECK-NEXT: retl 1094 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 1095 ret <2 x double> %res 1096 } 1097 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 1098 1099 1100 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 1101 ; CHECK-LABEL: test_x86_sse41_dpps: 1102 ; CHECK: ## BB#0: 1103 ; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 1104 ; CHECK-NEXT: retl 1105 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1106 ret <4 x float> %res 1107 } 1108 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 1109 1110 1111 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 1112 ; CHECK-LABEL: test_x86_sse41_insertps: 1113 ; CHECK: ## BB#0: 1114 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3] 1115 ; CHECK-NEXT: retl 1116 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1117 ret <4 x float> %res 1118 } 1119 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 1120 1121 1122 1123 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 1124 ; CHECK-LABEL: test_x86_sse41_mpsadbw: 1125 ; CHECK: ## BB#0: 1126 ; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 1127 ; CHECK-NEXT: retl 1128 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 1129 ret <8 x i16> %res 1130 } 1131 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 1132 1133 1134 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 1135 ; CHECK-LABEL: test_x86_sse41_packusdw: 1136 ; CHECK: ## BB#0: 1137 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1138 ; CHECK-NEXT: retl 1139 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 1140 ret <8 x i16> %res 1141 } 1142 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 1143 1144 1145 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 1146 ; CHECK-LABEL: test_x86_sse41_pblendvb: 1147 ; CHECK: ## BB#0: 1148 ; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1149 ; CHECK-NEXT: retl 1150 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 1151 ret <16 x i8> %res 1152 } 1153 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1154 1155 1156 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 1157 ; CHECK-LABEL: test_x86_sse41_phminposuw: 1158 ; CHECK: ## BB#0: 1159 ; CHECK-NEXT: vphminposuw %xmm0, %xmm0 1160 ; CHECK-NEXT: retl 1161 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1162 ret <8 x i16> %res 1163 } 1164 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 1165 1166 1167 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 1168 ; CHECK-LABEL: test_x86_sse41_pmaxsb: 1169 ; CHECK: ## BB#0: 1170 ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1171 ; CHECK-NEXT: retl 1172 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1173 ret <16 x i8> %res 1174 } 1175 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 1176 1177 1178 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 1179 ; CHECK-LABEL: test_x86_sse41_pmaxsd: 1180 ; CHECK: ## BB#0: 1181 ; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1182 ; CHECK-NEXT: retl 1183 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1184 ret <4 x i32> %res 1185 } 1186 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 1187 1188 1189 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 1190 ; CHECK-LABEL: test_x86_sse41_pmaxud: 1191 ; CHECK: ## BB#0: 1192 ; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1193 ; CHECK-NEXT: retl 1194 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1195 ret <4 x i32> %res 1196 } 1197 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 1198 1199 1200 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 1201 ; CHECK-LABEL: test_x86_sse41_pmaxuw: 1202 ; CHECK: ## BB#0: 1203 ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1204 ; CHECK-NEXT: retl 1205 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1206 ret <8 x i16> %res 1207 } 1208 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 1209 1210 1211 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 1212 ; CHECK-LABEL: test_x86_sse41_pminsb: 1213 ; CHECK: ## BB#0: 1214 ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1215 ; CHECK-NEXT: retl 1216 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1217 ret <16 x i8> %res 1218 } 1219 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 1220 1221 1222 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 1223 ; CHECK-LABEL: test_x86_sse41_pminsd: 1224 ; CHECK: ## BB#0: 1225 ; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1226 ; CHECK-NEXT: retl 1227 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1228 ret <4 x i32> %res 1229 } 1230 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 1231 1232 1233 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 1234 ; CHECK-LABEL: test_x86_sse41_pminud: 1235 ; CHECK: ## BB#0: 1236 ; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0 1237 ; CHECK-NEXT: retl 1238 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1239 ret <4 x i32> %res 1240 } 1241 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 1242 1243 1244 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 1245 ; CHECK-LABEL: test_x86_sse41_pminuw: 1246 ; CHECK: ## BB#0: 1247 ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1248 ; CHECK-NEXT: retl 1249 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1250 ret <8 x i16> %res 1251 } 1252 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 1253 1254 1255 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1256 ; CHECK-LABEL: test_x86_sse41_pmovzxbd: 1257 ; CHECK: ## BB#0: 1258 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1259 ; CHECK-NEXT: retl 1260 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1261 ret <4 x i32> %res 1262 } 1263 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1264 1265 1266 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1267 ; CHECK-LABEL: test_x86_sse41_pmovzxbq: 1268 ; CHECK: ## BB#0: 1269 ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1270 ; CHECK-NEXT: retl 1271 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1272 ret <2 x i64> %res 1273 } 1274 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1275 1276 1277 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1278 ; CHECK-LABEL: test_x86_sse41_pmovzxbw: 1279 ; CHECK: ## BB#0: 1280 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1281 ; CHECK-NEXT: retl 1282 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1283 ret <8 x i16> %res 1284 } 1285 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1286 1287 1288 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1289 ; CHECK-LABEL: test_x86_sse41_pmovzxdq: 1290 ; CHECK: ## BB#0: 1291 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1292 ; CHECK-NEXT: retl 1293 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1294 ret <2 x i64> %res 1295 } 1296 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1297 1298 1299 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1300 ; CHECK-LABEL: test_x86_sse41_pmovzxwd: 1301 ; CHECK: ## BB#0: 1302 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1303 ; CHECK-NEXT: retl 1304 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1305 ret <4 x i32> %res 1306 } 1307 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1308 1309 1310 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1311 ; CHECK-LABEL: test_x86_sse41_pmovzxwq: 1312 ; CHECK: ## BB#0: 1313 ; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1314 ; CHECK-NEXT: retl 1315 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1316 ret <2 x i64> %res 1317 } 1318 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1319 1320 1321 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1322 ; CHECK-LABEL: test_x86_sse41_pmuldq: 1323 ; CHECK: ## BB#0: 1324 ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 1325 ; CHECK-NEXT: retl 1326 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1327 ret <2 x i64> %res 1328 } 1329 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1330 1331 1332 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 1333 ; CHECK-LABEL: test_x86_sse41_ptestc: 1334 ; CHECK: ## BB#0: 1335 ; CHECK-NEXT: vptest %xmm1, %xmm0 1336 ; CHECK-NEXT: sbbl %eax, %eax 1337 ; CHECK-NEXT: andl $1, %eax 1338 ; CHECK-NEXT: retl 1339 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1340 ret i32 %res 1341 } 1342 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1343 1344 1345 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 1346 ; CHECK-LABEL: test_x86_sse41_ptestnzc: 1347 ; CHECK: ## BB#0: 1348 ; CHECK-NEXT: vptest %xmm1, %xmm0 1349 ; CHECK-NEXT: seta %al 1350 ; CHECK-NEXT: movzbl %al, %eax 1351 ; CHECK-NEXT: retl 1352 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1353 ret i32 %res 1354 } 1355 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1356 1357 1358 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 1359 ; CHECK-LABEL: test_x86_sse41_ptestz: 1360 ; CHECK: ## BB#0: 1361 ; CHECK-NEXT: vptest %xmm1, %xmm0 1362 ; CHECK-NEXT: sete %al 1363 ; CHECK-NEXT: movzbl %al, %eax 1364 ; CHECK-NEXT: retl 1365 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1366 ret i32 %res 1367 } 1368 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1369 1370 1371 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1372 ; CHECK-LABEL: test_x86_sse41_round_pd: 1373 ; CHECK: ## BB#0: 1374 ; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0 1375 ; CHECK-NEXT: retl 1376 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1377 ret <2 x double> %res 1378 } 1379 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1380 1381 1382 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1383 ; CHECK-LABEL: test_x86_sse41_round_ps: 1384 ; CHECK: ## BB#0: 1385 ; CHECK-NEXT: vroundps $7, %xmm0, %xmm0 1386 ; CHECK-NEXT: retl 1387 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1388 ret <4 x float> %res 1389 } 1390 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1391 1392 1393 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1394 ; CHECK-LABEL: test_x86_sse41_round_sd: 1395 ; CHECK: ## BB#0: 1396 ; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 1397 ; CHECK-NEXT: retl 1398 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1399 ret <2 x double> %res 1400 } 1401 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1402 1403 1404 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1405 ; CHECK-LABEL: test_x86_sse41_round_ss: 1406 ; CHECK: ## BB#0: 1407 ; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 1408 ; CHECK-NEXT: retl 1409 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1410 ret <4 x float> %res 1411 } 1412 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1413 1414 1415 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1416 ; CHECK-LABEL: test_x86_sse42_pcmpestri128: 1417 ; CHECK: ## BB#0: 1418 ; CHECK-NEXT: movl $7, %eax 1419 ; CHECK-NEXT: movl $7, %edx 1420 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1421 ; CHECK-NEXT: movl %ecx, %eax 1422 ; CHECK-NEXT: retl 1423 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1424 ret i32 %res 1425 } 1426 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1427 1428 1429 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { 1430 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load: 1431 ; CHECK: ## BB#0: 1432 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1433 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1434 ; CHECK-NEXT: vmovdqa (%eax), %xmm0 1435 ; CHECK-NEXT: movl $7, %eax 1436 ; CHECK-NEXT: movl $7, %edx 1437 ; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0 1438 ; CHECK-NEXT: movl %ecx, %eax 1439 ; CHECK-NEXT: retl 1440 %1 = load <16 x i8>, <16 x i8>* %a0 1441 %2 = load <16 x i8>, <16 x i8>* %a2 1442 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] 1443 ret i32 %res 1444 } 1445 1446 1447 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1448 ; CHECK-LABEL: test_x86_sse42_pcmpestria128: 1449 ; CHECK: ## BB#0: 1450 ; CHECK-NEXT: movl $7, %eax 1451 ; CHECK-NEXT: movl $7, %edx 1452 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1453 ; CHECK-NEXT: seta %al 1454 ; CHECK-NEXT: movzbl %al, %eax 1455 ; CHECK-NEXT: retl 1456 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1457 ret i32 %res 1458 } 1459 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1460 1461 1462 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1463 ; CHECK-LABEL: test_x86_sse42_pcmpestric128: 1464 ; CHECK: ## BB#0: 1465 ; CHECK-NEXT: movl $7, %eax 1466 ; CHECK-NEXT: movl $7, %edx 1467 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1468 ; CHECK-NEXT: sbbl %eax, %eax 1469 ; CHECK-NEXT: andl $1, %eax 1470 ; CHECK-NEXT: retl 1471 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1472 ret i32 %res 1473 } 1474 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1475 1476 1477 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1478 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128: 1479 ; CHECK: ## BB#0: 1480 ; CHECK-NEXT: movl $7, %eax 1481 ; CHECK-NEXT: movl $7, %edx 1482 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1483 ; CHECK-NEXT: seto %al 1484 ; CHECK-NEXT: movzbl %al, %eax 1485 ; CHECK-NEXT: retl 1486 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1487 ret i32 %res 1488 } 1489 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1490 1491 1492 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1493 ; CHECK-LABEL: test_x86_sse42_pcmpestris128: 1494 ; CHECK: ## BB#0: 1495 ; CHECK-NEXT: movl $7, %eax 1496 ; CHECK-NEXT: movl $7, %edx 1497 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1498 ; CHECK-NEXT: sets %al 1499 ; CHECK-NEXT: movzbl %al, %eax 1500 ; CHECK-NEXT: retl 1501 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1502 ret i32 %res 1503 } 1504 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1505 1506 1507 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1508 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128: 1509 ; CHECK: ## BB#0: 1510 ; CHECK-NEXT: movl $7, %eax 1511 ; CHECK-NEXT: movl $7, %edx 1512 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1513 ; CHECK-NEXT: sete %al 1514 ; CHECK-NEXT: movzbl %al, %eax 1515 ; CHECK-NEXT: retl 1516 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1517 ret i32 %res 1518 } 1519 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1520 1521 1522 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1523 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128: 1524 ; CHECK: ## BB#0: 1525 ; CHECK-NEXT: movl $7, %eax 1526 ; CHECK-NEXT: movl $7, %edx 1527 ; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0 1528 ; CHECK-NEXT: retl 1529 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1530 ret <16 x i8> %res 1531 } 1532 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1533 1534 1535 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { 1536 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load: 1537 ; CHECK: ## BB#0: 1538 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1539 ; CHECK-NEXT: movl $7, %eax 1540 ; CHECK-NEXT: movl $7, %edx 1541 ; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0 1542 ; CHECK-NEXT: retl 1543 %1 = load <16 x i8>, <16 x i8>* %a2 1544 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1545 ret <16 x i8> %res 1546 } 1547 1548 1549 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1550 ; CHECK-LABEL: test_x86_sse42_pcmpistri128: 1551 ; CHECK: ## BB#0: 1552 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1553 ; CHECK-NEXT: movl %ecx, %eax 1554 ; CHECK-NEXT: retl 1555 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1556 ret i32 %res 1557 } 1558 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1559 1560 1561 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { 1562 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load: 1563 ; CHECK: ## BB#0: 1564 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1565 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1566 ; CHECK-NEXT: vmovdqa (%ecx), %xmm0 1567 ; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0 1568 ; CHECK-NEXT: movl %ecx, %eax 1569 ; CHECK-NEXT: retl 1570 %1 = load <16 x i8>, <16 x i8>* %a0 1571 %2 = load <16 x i8>, <16 x i8>* %a1 1572 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] 1573 ret i32 %res 1574 } 1575 1576 1577 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1578 ; CHECK-LABEL: test_x86_sse42_pcmpistria128: 1579 ; CHECK: ## BB#0: 1580 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1581 ; CHECK-NEXT: seta %al 1582 ; CHECK-NEXT: movzbl %al, %eax 1583 ; CHECK-NEXT: retl 1584 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1585 ret i32 %res 1586 } 1587 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1588 1589 1590 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1591 ; CHECK-LABEL: test_x86_sse42_pcmpistric128: 1592 ; CHECK: ## BB#0: 1593 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1594 ; CHECK-NEXT: sbbl %eax, %eax 1595 ; CHECK-NEXT: andl $1, %eax 1596 ; CHECK-NEXT: retl 1597 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1598 ret i32 %res 1599 } 1600 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1601 1602 1603 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1604 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128: 1605 ; CHECK: ## BB#0: 1606 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1607 ; CHECK-NEXT: seto %al 1608 ; CHECK-NEXT: movzbl %al, %eax 1609 ; CHECK-NEXT: retl 1610 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1611 ret i32 %res 1612 } 1613 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1614 1615 1616 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1617 ; CHECK-LABEL: test_x86_sse42_pcmpistris128: 1618 ; CHECK: ## BB#0: 1619 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1620 ; CHECK-NEXT: sets %al 1621 ; CHECK-NEXT: movzbl %al, %eax 1622 ; CHECK-NEXT: retl 1623 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1624 ret i32 %res 1625 } 1626 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1627 1628 1629 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1630 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128: 1631 ; CHECK: ## BB#0: 1632 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1633 ; CHECK-NEXT: sete %al 1634 ; CHECK-NEXT: movzbl %al, %eax 1635 ; CHECK-NEXT: retl 1636 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1637 ret i32 %res 1638 } 1639 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1640 1641 1642 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1643 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128: 1644 ; CHECK: ## BB#0: 1645 ; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0 1646 ; CHECK-NEXT: retl 1647 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1648 ret <16 x i8> %res 1649 } 1650 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1651 1652 1653 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { 1654 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load: 1655 ; CHECK: ## BB#0: 1656 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1657 ; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 1658 ; CHECK-NEXT: retl 1659 %1 = load <16 x i8>, <16 x i8>* %a1 1660 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] 1661 ret <16 x i8> %res 1662 } 1663 1664 1665 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1666 ; CHECK-LABEL: test_x86_sse_add_ss: 1667 ; CHECK: ## BB#0: 1668 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 1669 ; CHECK-NEXT: retl 1670 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1671 ret <4 x float> %res 1672 } 1673 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1674 1675 1676 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1677 ; CHECK-LABEL: test_x86_sse_cmp_ps: 1678 ; CHECK: ## BB#0: 1679 ; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 1680 ; CHECK-NEXT: retl 1681 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1682 ret <4 x float> %res 1683 } 1684 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1685 1686 1687 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1688 ; CHECK-LABEL: test_x86_sse_cmp_ss: 1689 ; CHECK: ## BB#0: 1690 ; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 1691 ; CHECK-NEXT: retl 1692 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1693 ret <4 x float> %res 1694 } 1695 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1696 1697 1698 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1699 ; CHECK-LABEL: test_x86_sse_comieq_ss: 1700 ; CHECK: ## BB#0: 1701 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1702 ; CHECK-NEXT: sete %al 1703 ; CHECK-NEXT: movzbl %al, %eax 1704 ; CHECK-NEXT: retl 1705 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1706 ret i32 %res 1707 } 1708 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1709 1710 1711 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1712 ; CHECK-LABEL: test_x86_sse_comige_ss: 1713 ; CHECK: ## BB#0: 1714 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1715 ; CHECK-NEXT: setae %al 1716 ; CHECK-NEXT: movzbl %al, %eax 1717 ; CHECK-NEXT: retl 1718 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1719 ret i32 %res 1720 } 1721 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1722 1723 1724 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1725 ; CHECK-LABEL: test_x86_sse_comigt_ss: 1726 ; CHECK: ## BB#0: 1727 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1728 ; CHECK-NEXT: seta %al 1729 ; CHECK-NEXT: movzbl %al, %eax 1730 ; CHECK-NEXT: retl 1731 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1732 ret i32 %res 1733 } 1734 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1735 1736 1737 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1738 ; CHECK-LABEL: test_x86_sse_comile_ss: 1739 ; CHECK: ## BB#0: 1740 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1741 ; CHECK-NEXT: setbe %al 1742 ; CHECK-NEXT: movzbl %al, %eax 1743 ; CHECK-NEXT: retl 1744 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1745 ret i32 %res 1746 } 1747 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1748 1749 1750 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1751 ; CHECK-LABEL: test_x86_sse_comilt_ss: 1752 ; CHECK: ## BB#0: 1753 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1754 ; CHECK-NEXT: sbbl %eax, %eax 1755 ; CHECK-NEXT: andl $1, %eax 1756 ; CHECK-NEXT: retl 1757 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1758 ret i32 %res 1759 } 1760 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1761 1762 1763 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1764 ; CHECK-LABEL: test_x86_sse_comineq_ss: 1765 ; CHECK: ## BB#0: 1766 ; CHECK-NEXT: vcomiss %xmm1, %xmm0 1767 ; CHECK-NEXT: setne %al 1768 ; CHECK-NEXT: movzbl %al, %eax 1769 ; CHECK-NEXT: retl 1770 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1771 ret i32 %res 1772 } 1773 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1774 1775 1776 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1777 ; CHECK-LABEL: test_x86_sse_cvtsi2ss: 1778 ; CHECK: ## BB#0: 1779 ; CHECK-NEXT: movl $7, %eax 1780 ; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 1781 ; CHECK-NEXT: retl 1782 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1783 ret <4 x float> %res 1784 } 1785 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1786 1787 1788 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1789 ; CHECK-LABEL: test_x86_sse_cvtss2si: 1790 ; CHECK: ## BB#0: 1791 ; CHECK-NEXT: vcvtss2si %xmm0, %eax 1792 ; CHECK-NEXT: retl 1793 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1794 ret i32 %res 1795 } 1796 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1797 1798 1799 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1800 ; CHECK-LABEL: test_x86_sse_cvttss2si: 1801 ; CHECK: ## BB#0: 1802 ; CHECK-NEXT: vcvttss2si %xmm0, %eax 1803 ; CHECK-NEXT: retl 1804 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1805 ret i32 %res 1806 } 1807 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1808 1809 1810 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1811 ; CHECK-LABEL: test_x86_sse_div_ss: 1812 ; CHECK: ## BB#0: 1813 ; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 1814 ; CHECK-NEXT: retl 1815 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1816 ret <4 x float> %res 1817 } 1818 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1819 1820 1821 define void @test_x86_sse_ldmxcsr(i8* %a0) { 1822 ; CHECK-LABEL: test_x86_sse_ldmxcsr: 1823 ; CHECK: ## BB#0: 1824 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1825 ; CHECK-NEXT: vldmxcsr (%eax) 1826 ; CHECK-NEXT: retl 1827 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1828 ret void 1829 } 1830 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1831 1832 1833 1834 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1835 ; CHECK-LABEL: test_x86_sse_max_ps: 1836 ; CHECK: ## BB#0: 1837 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 1838 ; CHECK-NEXT: retl 1839 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1840 ret <4 x float> %res 1841 } 1842 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1843 1844 1845 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1846 ; CHECK-LABEL: test_x86_sse_max_ss: 1847 ; CHECK: ## BB#0: 1848 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 1849 ; CHECK-NEXT: retl 1850 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1851 ret <4 x float> %res 1852 } 1853 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1854 1855 1856 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1857 ; CHECK-LABEL: test_x86_sse_min_ps: 1858 ; CHECK: ## BB#0: 1859 ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 1860 ; CHECK-NEXT: retl 1861 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1862 ret <4 x float> %res 1863 } 1864 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1865 1866 1867 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1868 ; CHECK-LABEL: test_x86_sse_min_ss: 1869 ; CHECK: ## BB#0: 1870 ; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 1871 ; CHECK-NEXT: retl 1872 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1873 ret <4 x float> %res 1874 } 1875 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1876 1877 1878 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1879 ; CHECK-LABEL: test_x86_sse_movmsk_ps: 1880 ; CHECK: ## BB#0: 1881 ; CHECK-NEXT: vmovmskps %xmm0, %eax 1882 ; CHECK-NEXT: retl 1883 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1884 ret i32 %res 1885 } 1886 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1887 1888 1889 1890 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1891 ; CHECK-LABEL: test_x86_sse_mul_ss: 1892 ; CHECK: ## BB#0: 1893 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 1894 ; CHECK-NEXT: retl 1895 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1896 ret <4 x float> %res 1897 } 1898 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1899 1900 1901 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1902 ; CHECK-LABEL: test_x86_sse_rcp_ps: 1903 ; CHECK: ## BB#0: 1904 ; CHECK-NEXT: vrcpps %xmm0, %xmm0 1905 ; CHECK-NEXT: retl 1906 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1907 ret <4 x float> %res 1908 } 1909 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1910 1911 1912 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1913 ; CHECK-LABEL: test_x86_sse_rcp_ss: 1914 ; CHECK: ## BB#0: 1915 ; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0 1916 ; CHECK-NEXT: retl 1917 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1918 ret <4 x float> %res 1919 } 1920 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1921 1922 1923 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1924 ; CHECK-LABEL: test_x86_sse_rsqrt_ps: 1925 ; CHECK: ## BB#0: 1926 ; CHECK-NEXT: vrsqrtps %xmm0, %xmm0 1927 ; CHECK-NEXT: retl 1928 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1929 ret <4 x float> %res 1930 } 1931 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1932 1933 1934 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 1935 ; CHECK-LABEL: test_x86_sse_rsqrt_ss: 1936 ; CHECK: ## BB#0: 1937 ; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 1938 ; CHECK-NEXT: retl 1939 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1940 ret <4 x float> %res 1941 } 1942 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1943 1944 1945 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 1946 ; CHECK-LABEL: test_x86_sse_sqrt_ps: 1947 ; CHECK: ## BB#0: 1948 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0 1949 ; CHECK-NEXT: retl 1950 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1951 ret <4 x float> %res 1952 } 1953 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1954 1955 1956 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 1957 ; CHECK-LABEL: test_x86_sse_sqrt_ss: 1958 ; CHECK: ## BB#0: 1959 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 1960 ; CHECK-NEXT: retl 1961 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1962 ret <4 x float> %res 1963 } 1964 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1965 1966 1967 define void @test_x86_sse_stmxcsr(i8* %a0) { 1968 ; CHECK-LABEL: test_x86_sse_stmxcsr: 1969 ; CHECK: ## BB#0: 1970 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1971 ; CHECK-NEXT: vstmxcsr (%eax) 1972 ; CHECK-NEXT: retl 1973 call void @llvm.x86.sse.stmxcsr(i8* %a0) 1974 ret void 1975 } 1976 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 1977 1978 1979 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 1980 ; CHECK-LABEL: test_x86_sse_storeu_ps: 1981 ; CHECK: ## BB#0: 1982 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1983 ; CHECK-NEXT: vmovups %xmm0, (%eax) 1984 ; CHECK-NEXT: retl 1985 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 1986 ret void 1987 } 1988 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 1989 1990 1991 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 1992 ; CHECK-LABEL: test_x86_sse_sub_ss: 1993 ; CHECK: ## BB#0: 1994 ; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 1995 ; CHECK-NEXT: retl 1996 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1997 ret <4 x float> %res 1998 } 1999 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 2000 2001 2002 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 2003 ; CHECK-LABEL: test_x86_sse_ucomieq_ss: 2004 ; CHECK: ## BB#0: 2005 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2006 ; CHECK-NEXT: sete %al 2007 ; CHECK-NEXT: movzbl %al, %eax 2008 ; CHECK-NEXT: retl 2009 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2010 ret i32 %res 2011 } 2012 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 2013 2014 2015 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 2016 ; CHECK-LABEL: test_x86_sse_ucomige_ss: 2017 ; CHECK: ## BB#0: 2018 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2019 ; CHECK-NEXT: setae %al 2020 ; CHECK-NEXT: movzbl %al, %eax 2021 ; CHECK-NEXT: retl 2022 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2023 ret i32 %res 2024 } 2025 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 2026 2027 2028 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 2029 ; CHECK-LABEL: test_x86_sse_ucomigt_ss: 2030 ; CHECK: ## BB#0: 2031 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2032 ; CHECK-NEXT: seta %al 2033 ; CHECK-NEXT: movzbl %al, %eax 2034 ; CHECK-NEXT: retl 2035 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2036 ret i32 %res 2037 } 2038 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 2039 2040 2041 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 2042 ; CHECK-LABEL: test_x86_sse_ucomile_ss: 2043 ; CHECK: ## BB#0: 2044 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2045 ; CHECK-NEXT: setbe %al 2046 ; CHECK-NEXT: movzbl %al, %eax 2047 ; CHECK-NEXT: retl 2048 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2049 ret i32 %res 2050 } 2051 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 2052 2053 2054 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 2055 ; CHECK-LABEL: test_x86_sse_ucomilt_ss: 2056 ; CHECK: ## BB#0: 2057 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2058 ; CHECK-NEXT: sbbl %eax, %eax 2059 ; CHECK-NEXT: andl $1, %eax 2060 ; CHECK-NEXT: retl 2061 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2062 ret i32 %res 2063 } 2064 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 2065 2066 2067 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 2068 ; CHECK-LABEL: test_x86_sse_ucomineq_ss: 2069 ; CHECK: ## BB#0: 2070 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 2071 ; CHECK-NEXT: setne %al 2072 ; CHECK-NEXT: movzbl %al, %eax 2073 ; CHECK-NEXT: retl 2074 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2075 ret i32 %res 2076 } 2077 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 2078 2079 2080 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 2081 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128: 2082 ; CHECK: ## BB#0: 2083 ; CHECK-NEXT: vpabsb %xmm0, %xmm0 2084 ; CHECK-NEXT: retl 2085 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 2086 ret <16 x i8> %res 2087 } 2088 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 2089 2090 2091 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 2092 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128: 2093 ; CHECK: ## BB#0: 2094 ; CHECK-NEXT: vpabsd %xmm0, %xmm0 2095 ; CHECK-NEXT: retl 2096 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 2097 ret <4 x i32> %res 2098 } 2099 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 2100 2101 2102 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 2103 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128: 2104 ; CHECK: ## BB#0: 2105 ; CHECK-NEXT: vpabsw %xmm0, %xmm0 2106 ; CHECK-NEXT: retl 2107 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 2108 ret <8 x i16> %res 2109 } 2110 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 2111 2112 2113 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2114 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128: 2115 ; CHECK: ## BB#0: 2116 ; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0 2117 ; CHECK-NEXT: retl 2118 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2119 ret <4 x i32> %res 2120 } 2121 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2122 2123 2124 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2125 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128: 2126 ; CHECK: ## BB#0: 2127 ; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 2128 ; CHECK-NEXT: retl 2129 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2130 ret <8 x i16> %res 2131 } 2132 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2133 2134 2135 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2136 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128: 2137 ; CHECK: ## BB#0: 2138 ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0 2139 ; CHECK-NEXT: retl 2140 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2141 ret <8 x i16> %res 2142 } 2143 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2144 2145 2146 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2147 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128: 2148 ; CHECK: ## BB#0: 2149 ; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0 2150 ; CHECK-NEXT: retl 2151 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2152 ret <4 x i32> %res 2153 } 2154 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2155 2156 2157 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2158 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128: 2159 ; CHECK: ## BB#0: 2160 ; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 2161 ; CHECK-NEXT: retl 2162 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2163 ret <8 x i16> %res 2164 } 2165 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2166 2167 2168 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2169 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128: 2170 ; CHECK: ## BB#0: 2171 ; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0 2172 ; CHECK-NEXT: retl 2173 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2174 ret <8 x i16> %res 2175 } 2176 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2177 2178 2179 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { 2180 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128: 2181 ; CHECK: ## BB#0: 2182 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 2183 ; CHECK-NEXT: retl 2184 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] 2185 ret <8 x i16> %res 2186 } 2187 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 2188 2189 2190 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2191 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128: 2192 ; CHECK: ## BB#0: 2193 ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 2194 ; CHECK-NEXT: retl 2195 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2196 ret <8 x i16> %res 2197 } 2198 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2199 2200 2201 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2202 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128: 2203 ; CHECK: ## BB#0: 2204 ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2205 ; CHECK-NEXT: retl 2206 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2207 ret <16 x i8> %res 2208 } 2209 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2210 2211 2212 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2213 ; CHECK-LABEL: test_x86_ssse3_psign_b_128: 2214 ; CHECK: ## BB#0: 2215 ; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0 2216 ; CHECK-NEXT: retl 2217 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2218 ret <16 x i8> %res 2219 } 2220 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2221 2222 2223 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2224 ; CHECK-LABEL: test_x86_ssse3_psign_d_128: 2225 ; CHECK: ## BB#0: 2226 ; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0 2227 ; CHECK-NEXT: retl 2228 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2229 ret <4 x i32> %res 2230 } 2231 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2232 2233 2234 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2235 ; CHECK-LABEL: test_x86_ssse3_psign_w_128: 2236 ; CHECK: ## BB#0: 2237 ; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0 2238 ; CHECK-NEXT: retl 2239 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2240 ret <8 x i16> %res 2241 } 2242 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2243 2244 2245 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2246 ; CHECK-LABEL: test_x86_avx_addsub_pd_256: 2247 ; CHECK: ## BB#0: 2248 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 2249 ; CHECK-NEXT: retl 2250 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2251 ret <4 x double> %res 2252 } 2253 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2254 2255 2256 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2257 ; CHECK-LABEL: test_x86_avx_addsub_ps_256: 2258 ; CHECK: ## BB#0: 2259 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 2260 ; CHECK-NEXT: retl 2261 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2262 ret <8 x float> %res 2263 } 2264 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2265 2266 2267 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 2268 ; CHECK-LABEL: test_x86_avx_blendv_pd_256: 2269 ; CHECK: ## BB#0: 2270 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2271 ; CHECK-NEXT: retl 2272 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 2273 ret <4 x double> %res 2274 } 2275 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 2276 2277 2278 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 2279 ; CHECK-LABEL: test_x86_avx_blendv_ps_256: 2280 ; CHECK: ## BB#0: 2281 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 2282 ; CHECK-NEXT: retl 2283 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 2284 ret <8 x float> %res 2285 } 2286 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 2287 2288 2289 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 2290 ; CHECK-LABEL: test_x86_avx_cmp_pd_256: 2291 ; CHECK: ## BB#0: 2292 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 2293 ; CHECK-NEXT: retl 2294 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2295 ret <4 x double> %res 2296 } 2297 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2298 2299 2300 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2301 ; CHECK-LABEL: test_x86_avx_cmp_ps_256: 2302 ; CHECK: ## BB#0: 2303 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 2304 ; CHECK-NEXT: retl 2305 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2306 ret <8 x float> %res 2307 } 2308 2309 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 2310 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op: 2311 ; CHECK: ## BB#0: 2312 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 2313 ; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1 2314 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1 2315 ; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1 2316 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1 2317 ; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1 2318 ; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1 2319 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1 2320 ; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1 2321 ; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1 2322 ; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1 2323 ; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1 2324 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1 2325 ; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1 2326 ; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1 2327 ; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1 2328 ; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1 2329 ; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1 2330 ; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1 2331 ; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1 2332 ; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1 2333 ; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1 2334 ; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1 2335 ; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1 2336 ; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1 2337 ; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1 2338 ; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1 2339 ; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1 2340 ; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1 2341 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 2342 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 2343 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 2344 ; CHECK-NEXT: retl 2345 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 2346 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 2347 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 2348 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 2349 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 2350 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 2351 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 2352 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 2353 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 2354 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 2355 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 2356 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 2357 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 2358 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 2359 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 2360 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 2361 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 2362 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 2363 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 2364 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 2365 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 2366 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 2367 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 2368 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 2369 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 2370 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 2371 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 2372 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 2373 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 2374 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 2375 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 2376 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 2377 ret <8 x float> %res 2378 } 2379 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2380 2381 2382 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 2383 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256: 2384 ; CHECK: ## BB#0: 2385 ; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0 2386 ; CHECK-NEXT: vzeroupper 2387 ; CHECK-NEXT: retl 2388 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 2389 ret <4 x float> %res 2390 } 2391 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 2392 2393 2394 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 2395 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256: 2396 ; CHECK: ## BB#0: 2397 ; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0 2398 ; CHECK-NEXT: vzeroupper 2399 ; CHECK-NEXT: retl 2400 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2401 ret <4 x i32> %res 2402 } 2403 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 2404 2405 2406 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 2407 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256: 2408 ; CHECK: ## BB#0: 2409 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 2410 ; CHECK-NEXT: retl 2411 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 2412 ret <4 x double> %res 2413 } 2414 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 2415 2416 2417 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 2418 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256: 2419 ; CHECK: ## BB#0: 2420 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 2421 ; CHECK-NEXT: retl 2422 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2423 ret <8 x i32> %res 2424 } 2425 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 2426 2427 2428 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 2429 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256: 2430 ; CHECK: ## BB#0: 2431 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 2432 ; CHECK-NEXT: retl 2433 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 2434 ret <4 x double> %res 2435 } 2436 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 2437 2438 2439 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 2440 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256: 2441 ; CHECK: ## BB#0: 2442 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 2443 ; CHECK-NEXT: retl 2444 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 2445 ret <8 x float> %res 2446 } 2447 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 2448 2449 2450 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 2451 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256: 2452 ; CHECK: ## BB#0: 2453 ; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0 2454 ; CHECK-NEXT: vzeroupper 2455 ; CHECK-NEXT: retl 2456 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2457 ret <4 x i32> %res 2458 } 2459 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 2460 2461 2462 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 2463 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256: 2464 ; CHECK: ## BB#0: 2465 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 2466 ; CHECK-NEXT: retl 2467 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2468 ret <8 x i32> %res 2469 } 2470 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 2471 2472 2473 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2474 ; CHECK-LABEL: test_x86_avx_dp_ps_256: 2475 ; CHECK: ## BB#0: 2476 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 2477 ; CHECK-NEXT: retl 2478 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2479 ret <8 x float> %res 2480 } 2481 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2482 2483 2484 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 2485 ; CHECK-LABEL: test_x86_avx_hadd_pd_256: 2486 ; CHECK: ## BB#0: 2487 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2488 ; CHECK-NEXT: retl 2489 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2490 ret <4 x double> %res 2491 } 2492 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 2493 2494 2495 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 2496 ; CHECK-LABEL: test_x86_avx_hadd_ps_256: 2497 ; CHECK: ## BB#0: 2498 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 2499 ; CHECK-NEXT: retl 2500 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2501 ret <8 x float> %res 2502 } 2503 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 2504 2505 2506 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2507 ; CHECK-LABEL: test_x86_avx_hsub_pd_256: 2508 ; CHECK: ## BB#0: 2509 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 2510 ; CHECK-NEXT: retl 2511 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2512 ret <4 x double> %res 2513 } 2514 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2515 2516 2517 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2518 ; CHECK-LABEL: test_x86_avx_hsub_ps_256: 2519 ; CHECK: ## BB#0: 2520 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 2521 ; CHECK-NEXT: retl 2522 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2523 ret <8 x float> %res 2524 } 2525 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2526 2527 2528 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 2529 ; CHECK-LABEL: test_x86_avx_ldu_dq_256: 2530 ; CHECK: ## BB#0: 2531 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2532 ; CHECK-NEXT: vlddqu (%eax), %ymm0 2533 ; CHECK-NEXT: retl 2534 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2535 ret <32 x i8> %res 2536 } 2537 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2538 2539 2540 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) { 2541 ; CHECK-LABEL: test_x86_avx_maskload_pd: 2542 ; CHECK: ## BB#0: 2543 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2544 ; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 2545 ; CHECK-NEXT: retl 2546 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1] 2547 ret <2 x double> %res 2548 } 2549 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly 2550 2551 2552 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) { 2553 ; CHECK-LABEL: test_x86_avx_maskload_pd_256: 2554 ; CHECK: ## BB#0: 2555 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2556 ; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 2557 ; CHECK-NEXT: retl 2558 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1] 2559 ret <4 x double> %res 2560 } 2561 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly 2562 2563 2564 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) { 2565 ; CHECK-LABEL: test_x86_avx_maskload_ps: 2566 ; CHECK: ## BB#0: 2567 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2568 ; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 2569 ; CHECK-NEXT: retl 2570 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1] 2571 ret <4 x float> %res 2572 } 2573 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly 2574 2575 2576 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) { 2577 ; CHECK-LABEL: test_x86_avx_maskload_ps_256: 2578 ; CHECK: ## BB#0: 2579 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2580 ; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 2581 ; CHECK-NEXT: retl 2582 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1] 2583 ret <8 x float> %res 2584 } 2585 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly 2586 2587 2588 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) { 2589 ; CHECK-LABEL: test_x86_avx_maskstore_pd: 2590 ; CHECK: ## BB#0: 2591 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2592 ; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) 2593 ; CHECK-NEXT: retl 2594 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) 2595 ret void 2596 } 2597 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind 2598 2599 2600 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) { 2601 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256: 2602 ; CHECK: ## BB#0: 2603 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2604 ; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) 2605 ; CHECK-NEXT: vzeroupper 2606 ; CHECK-NEXT: retl 2607 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) 2608 ret void 2609 } 2610 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind 2611 2612 2613 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) { 2614 ; CHECK-LABEL: test_x86_avx_maskstore_ps: 2615 ; CHECK: ## BB#0: 2616 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2617 ; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) 2618 ; CHECK-NEXT: retl 2619 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) 2620 ret void 2621 } 2622 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind 2623 2624 2625 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) { 2626 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256: 2627 ; CHECK: ## BB#0: 2628 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2629 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) 2630 ; CHECK-NEXT: vzeroupper 2631 ; CHECK-NEXT: retl 2632 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) 2633 ret void 2634 } 2635 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind 2636 2637 2638 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2639 ; CHECK-LABEL: test_x86_avx_max_pd_256: 2640 ; CHECK: ## BB#0: 2641 ; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 2642 ; CHECK-NEXT: retl 2643 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2644 ret <4 x double> %res 2645 } 2646 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2647 2648 2649 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2650 ; CHECK-LABEL: test_x86_avx_max_ps_256: 2651 ; CHECK: ## BB#0: 2652 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 2653 ; CHECK-NEXT: retl 2654 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2655 ret <8 x float> %res 2656 } 2657 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2658 2659 2660 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2661 ; CHECK-LABEL: test_x86_avx_min_pd_256: 2662 ; CHECK: ## BB#0: 2663 ; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0 2664 ; CHECK-NEXT: retl 2665 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2666 ret <4 x double> %res 2667 } 2668 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2669 2670 2671 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2672 ; CHECK-LABEL: test_x86_avx_min_ps_256: 2673 ; CHECK: ## BB#0: 2674 ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 2675 ; CHECK-NEXT: retl 2676 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2677 ret <8 x float> %res 2678 } 2679 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2680 2681 2682 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2683 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256: 2684 ; CHECK: ## BB#0: 2685 ; CHECK-NEXT: vmovmskpd %ymm0, %eax 2686 ; CHECK-NEXT: vzeroupper 2687 ; CHECK-NEXT: retl 2688 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2689 ret i32 %res 2690 } 2691 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2692 2693 2694 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2695 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256: 2696 ; CHECK: ## BB#0: 2697 ; CHECK-NEXT: vmovmskps %ymm0, %eax 2698 ; CHECK-NEXT: vzeroupper 2699 ; CHECK-NEXT: retl 2700 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2701 ret i32 %res 2702 } 2703 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2704 2705 2706 2707 2708 2709 2710 2711 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2712 ; CHECK-LABEL: test_x86_avx_ptestc_256: 2713 ; CHECK: ## BB#0: 2714 ; CHECK-NEXT: vptest %ymm1, %ymm0 2715 ; CHECK-NEXT: sbbl %eax, %eax 2716 ; CHECK-NEXT: andl $1, %eax 2717 ; CHECK-NEXT: vzeroupper 2718 ; CHECK-NEXT: retl 2719 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2720 ret i32 %res 2721 } 2722 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2723 2724 2725 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2726 ; CHECK-LABEL: test_x86_avx_ptestnzc_256: 2727 ; CHECK: ## BB#0: 2728 ; CHECK-NEXT: vptest %ymm1, %ymm0 2729 ; CHECK-NEXT: seta %al 2730 ; CHECK-NEXT: movzbl %al, %eax 2731 ; CHECK-NEXT: vzeroupper 2732 ; CHECK-NEXT: retl 2733 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2734 ret i32 %res 2735 } 2736 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2737 2738 2739 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2740 ; CHECK-LABEL: test_x86_avx_ptestz_256: 2741 ; CHECK: ## BB#0: 2742 ; CHECK-NEXT: vptest %ymm1, %ymm0 2743 ; CHECK-NEXT: sete %al 2744 ; CHECK-NEXT: movzbl %al, %eax 2745 ; CHECK-NEXT: vzeroupper 2746 ; CHECK-NEXT: retl 2747 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2748 ret i32 %res 2749 } 2750 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2751 2752 2753 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2754 ; CHECK-LABEL: test_x86_avx_rcp_ps_256: 2755 ; CHECK: ## BB#0: 2756 ; CHECK-NEXT: vrcpps %ymm0, %ymm0 2757 ; CHECK-NEXT: retl 2758 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2759 ret <8 x float> %res 2760 } 2761 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2762 2763 2764 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2765 ; CHECK-LABEL: test_x86_avx_round_pd_256: 2766 ; CHECK: ## BB#0: 2767 ; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0 2768 ; CHECK-NEXT: retl 2769 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2770 ret <4 x double> %res 2771 } 2772 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2773 2774 2775 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2776 ; CHECK-LABEL: test_x86_avx_round_ps_256: 2777 ; CHECK: ## BB#0: 2778 ; CHECK-NEXT: vroundps $7, %ymm0, %ymm0 2779 ; CHECK-NEXT: retl 2780 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2781 ret <8 x float> %res 2782 } 2783 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2784 2785 2786 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2787 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256: 2788 ; CHECK: ## BB#0: 2789 ; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 2790 ; CHECK-NEXT: retl 2791 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2792 ret <8 x float> %res 2793 } 2794 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2795 2796 2797 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2798 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256: 2799 ; CHECK: ## BB#0: 2800 ; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 2801 ; CHECK-NEXT: retl 2802 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2803 ret <4 x double> %res 2804 } 2805 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2806 2807 2808 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2809 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256: 2810 ; CHECK: ## BB#0: 2811 ; CHECK-NEXT: vsqrtps %ymm0, %ymm0 2812 ; CHECK-NEXT: retl 2813 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2814 ret <8 x float> %res 2815 } 2816 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2817 2818 2819 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2820 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 2821 ; add operation forces the execution domain. 2822 ; CHECK-LABEL: test_x86_avx_storeu_dq_256: 2823 ; CHECK: ## BB#0: 2824 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2825 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 2826 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2827 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2828 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2829 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2830 ; CHECK-NEXT: vmovups %ymm0, (%eax) 2831 ; CHECK-NEXT: vzeroupper 2832 ; CHECK-NEXT: retl 2833 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2834 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 2835 ret void 2836 } 2837 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2838 2839 2840 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2841 ; add operation forces the execution domain. 2842 ; CHECK-LABEL: test_x86_avx_storeu_pd_256: 2843 ; CHECK: ## BB#0: 2844 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2845 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 2846 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 2847 ; CHECK-NEXT: vmovupd %ymm0, (%eax) 2848 ; CHECK-NEXT: vzeroupper 2849 ; CHECK-NEXT: retl 2850 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2851 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 2852 ret void 2853 } 2854 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2855 2856 2857 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2858 ; CHECK-LABEL: test_x86_avx_storeu_ps_256: 2859 ; CHECK: ## BB#0: 2860 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2861 ; CHECK-NEXT: vmovups %ymm0, (%eax) 2862 ; CHECK-NEXT: vzeroupper 2863 ; CHECK-NEXT: retl 2864 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2865 ret void 2866 } 2867 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2868 2869 2870 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2871 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256: 2872 ; CHECK: ## BB#0: 2873 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2874 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2875 ; CHECK-NEXT: retl 2876 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2877 ret <4 x double> %res 2878 } 2879 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2880 2881 2882 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2883 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256: 2884 ; CHECK: ## BB#0: 2885 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2886 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2887 ; CHECK-NEXT: retl 2888 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2889 ret <8 x float> %res 2890 } 2891 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2892 2893 2894 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2895 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256: 2896 ; CHECK: ## BB#0: 2897 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2898 ; CHECK-NEXT: retl 2899 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2900 ret <4 x double> %res 2901 } 2902 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2903 2904 2905 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2906 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256: 2907 ; CHECK: ## BB#0: 2908 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2909 ; CHECK-NEXT: retl 2910 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2911 ret <8 x float> %res 2912 } 2913 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2914 2915 2916 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2917 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256: 2918 ; CHECK: ## BB#0: 2919 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2920 ; CHECK-NEXT: retl 2921 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2922 ret <8 x i32> %res 2923 } 2924 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2925 2926 2927 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2928 ; CHECK-LABEL: test_x86_avx_vpermil_pd: 2929 ; CHECK: ## BB#0: 2930 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2931 ; CHECK-NEXT: retl 2932 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 2933 ret <2 x double> %res 2934 } 2935 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 2936 2937 2938 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 2939 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256: 2940 ; CHECK: ## BB#0: 2941 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 2942 ; CHECK-NEXT: retl 2943 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 2944 ret <4 x double> %res 2945 } 2946 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 2947 2948 2949 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 2950 ; CHECK-LABEL: test_x86_avx_vpermil_ps: 2951 ; CHECK: ## BB#0: 2952 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0] 2953 ; CHECK-NEXT: retl 2954 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2955 ret <4 x float> %res 2956 } 2957 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 2958 2959 2960 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 2961 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256: 2962 ; CHECK: ## BB#0: 2963 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4] 2964 ; CHECK-NEXT: retl 2965 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 2966 ret <8 x float> %res 2967 } 2968 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 2969 2970 2971 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 2972 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd: 2973 ; CHECK: ## BB#0: 2974 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 2975 ; CHECK-NEXT: retl 2976 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 2977 ret <2 x double> %res 2978 } 2979 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 2980 2981 2982 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 2983 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256: 2984 ; CHECK: ## BB#0: 2985 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 2986 ; CHECK-NEXT: retl 2987 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 2988 ret <4 x double> %res 2989 } 2990 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 2991 2992 2993 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 2994 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps: 2995 ; CHECK: ## BB#0: 2996 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 2997 ; CHECK-NEXT: retl 2998 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 2999 ret <4 x float> %res 3000 } 3001 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 3002 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load: 3003 ; CHECK: ## BB#0: 3004 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3005 ; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0 3006 ; CHECK-NEXT: retl 3007 %a2 = load <4 x i32>, <4 x i32>* %a1 3008 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 3009 ret <4 x float> %res 3010 } 3011 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 3012 3013 3014 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 3015 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256: 3016 ; CHECK: ## BB#0: 3017 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 3018 ; CHECK-NEXT: retl 3019 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 3020 ret <8 x float> %res 3021 } 3022 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 3023 3024 3025 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 3026 ; CHECK-LABEL: test_x86_avx_vtestc_pd: 3027 ; CHECK: ## BB#0: 3028 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 3029 ; CHECK-NEXT: sbbl %eax, %eax 3030 ; CHECK-NEXT: andl $1, %eax 3031 ; CHECK-NEXT: retl 3032 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3033 ret i32 %res 3034 } 3035 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 3036 3037 3038 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3039 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256: 3040 ; CHECK: ## BB#0: 3041 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 3042 ; CHECK-NEXT: sbbl %eax, %eax 3043 ; CHECK-NEXT: andl $1, %eax 3044 ; CHECK-NEXT: vzeroupper 3045 ; CHECK-NEXT: retl 3046 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3047 ret i32 %res 3048 } 3049 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3050 3051 3052 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 3053 ; CHECK-LABEL: test_x86_avx_vtestc_ps: 3054 ; CHECK: ## BB#0: 3055 ; CHECK-NEXT: vtestps %xmm1, %xmm0 3056 ; CHECK-NEXT: sbbl %eax, %eax 3057 ; CHECK-NEXT: andl $1, %eax 3058 ; CHECK-NEXT: retl 3059 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3060 ret i32 %res 3061 } 3062 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 3063 3064 3065 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3066 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256: 3067 ; CHECK: ## BB#0: 3068 ; CHECK-NEXT: vtestps %ymm1, %ymm0 3069 ; CHECK-NEXT: sbbl %eax, %eax 3070 ; CHECK-NEXT: andl $1, %eax 3071 ; CHECK-NEXT: vzeroupper 3072 ; CHECK-NEXT: retl 3073 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3074 ret i32 %res 3075 } 3076 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3077 3078 3079 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 3080 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd: 3081 ; CHECK: ## BB#0: 3082 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 3083 ; CHECK-NEXT: seta %al 3084 ; CHECK-NEXT: movzbl %al, %eax 3085 ; CHECK-NEXT: retl 3086 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3087 ret i32 %res 3088 } 3089 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 3090 3091 3092 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3093 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256: 3094 ; CHECK: ## BB#0: 3095 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 3096 ; CHECK-NEXT: seta %al 3097 ; CHECK-NEXT: movzbl %al, %eax 3098 ; CHECK-NEXT: vzeroupper 3099 ; CHECK-NEXT: retl 3100 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3101 ret i32 %res 3102 } 3103 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3104 3105 3106 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 3107 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps: 3108 ; CHECK: ## BB#0: 3109 ; CHECK-NEXT: vtestps %xmm1, %xmm0 3110 ; CHECK-NEXT: seta %al 3111 ; CHECK-NEXT: movzbl %al, %eax 3112 ; CHECK-NEXT: retl 3113 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3114 ret i32 %res 3115 } 3116 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 3117 3118 3119 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3120 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256: 3121 ; CHECK: ## BB#0: 3122 ; CHECK-NEXT: vtestps %ymm1, %ymm0 3123 ; CHECK-NEXT: seta %al 3124 ; CHECK-NEXT: movzbl %al, %eax 3125 ; CHECK-NEXT: vzeroupper 3126 ; CHECK-NEXT: retl 3127 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3128 ret i32 %res 3129 } 3130 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3131 3132 3133 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 3134 ; CHECK-LABEL: test_x86_avx_vtestz_pd: 3135 ; CHECK: ## BB#0: 3136 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 3137 ; CHECK-NEXT: sete %al 3138 ; CHECK-NEXT: movzbl %al, %eax 3139 ; CHECK-NEXT: retl 3140 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3141 ret i32 %res 3142 } 3143 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 3144 3145 3146 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 3147 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256: 3148 ; CHECK: ## BB#0: 3149 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 3150 ; CHECK-NEXT: sete %al 3151 ; CHECK-NEXT: movzbl %al, %eax 3152 ; CHECK-NEXT: vzeroupper 3153 ; CHECK-NEXT: retl 3154 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3155 ret i32 %res 3156 } 3157 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 3158 3159 3160 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 3161 ; CHECK-LABEL: test_x86_avx_vtestz_ps: 3162 ; CHECK: ## BB#0: 3163 ; CHECK-NEXT: vtestps %xmm1, %xmm0 3164 ; CHECK-NEXT: sete %al 3165 ; CHECK-NEXT: movzbl %al, %eax 3166 ; CHECK-NEXT: retl 3167 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3168 ret i32 %res 3169 } 3170 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 3171 3172 3173 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 3174 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256: 3175 ; CHECK: ## BB#0: 3176 ; CHECK-NEXT: vtestps %ymm1, %ymm0 3177 ; CHECK-NEXT: sete %al 3178 ; CHECK-NEXT: movzbl %al, %eax 3179 ; CHECK-NEXT: vzeroupper 3180 ; CHECK-NEXT: retl 3181 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3182 ret i32 %res 3183 } 3184 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 3185 3186 3187 define void @test_x86_avx_vzeroall() { 3188 ; CHECK-LABEL: test_x86_avx_vzeroall: 3189 ; CHECK: ## BB#0: 3190 ; CHECK-NEXT: vzeroall 3191 ; CHECK-NEXT: vzeroupper 3192 ; CHECK-NEXT: retl 3193 call void @llvm.x86.avx.vzeroall() 3194 ret void 3195 } 3196 declare void @llvm.x86.avx.vzeroall() nounwind 3197 3198 3199 define void @test_x86_avx_vzeroupper() { 3200 ; CHECK-LABEL: test_x86_avx_vzeroupper: 3201 ; CHECK: ## BB#0: 3202 ; CHECK-NEXT: vzeroupper 3203 ; CHECK-NEXT: vzeroupper 3204 ; CHECK-NEXT: retl 3205 call void @llvm.x86.avx.vzeroupper() 3206 ret void 3207 } 3208 declare void @llvm.x86.avx.vzeroupper() nounwind 3209 3210 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 3211 3212 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 3213 ; CHECK-LABEL: monitor: 3214 ; CHECK: ## BB#0: 3215 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 3216 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3217 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3218 ; CHECK-NEXT: leal (%eax), %eax 3219 ; CHECK-NEXT: monitor 3220 ; CHECK-NEXT: retl 3221 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 3222 ret void 3223 } 3224 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 3225 3226 define void @mwait(i32 %E, i32 %H) nounwind { 3227 ; CHECK-LABEL: mwait: 3228 ; CHECK: ## BB#0: 3229 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3230 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3231 ; CHECK-NEXT: mwait 3232 ; CHECK-NEXT: retl 3233 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 3234 ret void 3235 } 3236 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 3237 3238 define void @sfence() nounwind { 3239 ; CHECK-LABEL: sfence: 3240 ; CHECK: ## BB#0: 3241 ; CHECK-NEXT: sfence 3242 ; CHECK-NEXT: retl 3243 tail call void @llvm.x86.sse.sfence() 3244 ret void 3245 } 3246 declare void @llvm.x86.sse.sfence() nounwind 3247 3248 define void @lfence() nounwind { 3249 ; CHECK-LABEL: lfence: 3250 ; CHECK: ## BB#0: 3251 ; CHECK-NEXT: lfence 3252 ; CHECK-NEXT: retl 3253 tail call void @llvm.x86.sse2.lfence() 3254 ret void 3255 } 3256 declare void @llvm.x86.sse2.lfence() nounwind 3257 3258 define void @mfence() nounwind { 3259 ; CHECK-LABEL: mfence: 3260 ; CHECK: ## BB#0: 3261 ; CHECK-NEXT: mfence 3262 ; CHECK-NEXT: retl 3263 tail call void @llvm.x86.sse2.mfence() 3264 ret void 3265 } 3266 declare void @llvm.x86.sse2.mfence() nounwind 3267 3268 define void @clflush(i8* %p) nounwind { 3269 ; CHECK-LABEL: clflush: 3270 ; CHECK: ## BB#0: 3271 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3272 ; CHECK-NEXT: clflush (%eax) 3273 ; CHECK-NEXT: retl 3274 tail call void @llvm.x86.sse2.clflush(i8* %p) 3275 ret void 3276 } 3277 declare void @llvm.x86.sse2.clflush(i8*) nounwind 3278 3279 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { 3280 ; CHECK-LABEL: crc32_32_8: 3281 ; CHECK: ## BB#0: 3282 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3283 ; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax 3284 ; CHECK-NEXT: retl 3285 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) 3286 ret i32 %tmp 3287 } 3288 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 3289 3290 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { 3291 ; CHECK-LABEL: crc32_32_16: 3292 ; CHECK: ## BB#0: 3293 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3294 ; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax 3295 ; CHECK-NEXT: retl 3296 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) 3297 ret i32 %tmp 3298 } 3299 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 3300 3301 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { 3302 ; CHECK-LABEL: crc32_32_32: 3303 ; CHECK: ## BB#0: 3304 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3305 ; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax 3306 ; CHECK-NEXT: retl 3307 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) 3308 ret i32 %tmp 3309 } 3310 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 3311 3312 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { 3313 ; CHECK-LABEL: movnt_dq: 3314 ; CHECK: ## BB#0: 3315 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3316 ; CHECK-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0 3317 ; CHECK-NEXT: vmovntdq %ymm0, (%eax) 3318 ; CHECK-NEXT: vzeroupper 3319 ; CHECK-NEXT: retl 3320 %a2 = add <2 x i64> %a1, <i64 1, i64 1> 3321 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 3322 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind 3323 ret void 3324 } 3325 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 3326 3327 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 3328 ; CHECK-LABEL: movnt_ps: 3329 ; CHECK: ## BB#0: 3330 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3331 ; CHECK-NEXT: vmovntps %ymm0, (%eax) 3332 ; CHECK-NEXT: vzeroupper 3333 ; CHECK-NEXT: retl 3334 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 3335 ret void 3336 } 3337 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 3338 3339 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 3340 ; add operation forces the execution domain. 3341 ; CHECK-LABEL: movnt_pd: 3342 ; CHECK: ## BB#0: 3343 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3344 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 3345 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 3346 ; CHECK-NEXT: vmovntpd %ymm0, (%eax) 3347 ; CHECK-NEXT: vzeroupper 3348 ; CHECK-NEXT: retl 3349 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 3350 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 3351 ret void 3352 } 3353 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 3354 3355 3356 ; Check for pclmulqdq 3357 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 3358 ; CHECK-LABEL: test_x86_pclmulqdq: 3359 ; CHECK: ## BB#0: 3360 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 3361 ; CHECK-NEXT: retl 3362 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 3363 ret <2 x i64> %res 3364 } 3365 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 3366