1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s 2 3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 4 ; CHECK: vaesdec 5 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 6 ret <2 x i64> %res 7 } 8 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 9 10 11 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 12 ; CHECK: vaesdeclast 13 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 14 ret <2 x i64> %res 15 } 16 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 17 18 19 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 20 ; CHECK: vaesenc 21 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 22 ret <2 x i64> %res 23 } 24 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 25 26 27 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 28 ; CHECK: vaesenclast 29 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 30 ret <2 x i64> %res 31 } 32 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 33 34 35 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 36 ; CHECK: vaesimc 37 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 38 ret <2 x i64> %res 39 } 40 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 41 42 43 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 44 ; CHECK: vaeskeygenassist 45 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 46 ret <2 x i64> %res 47 } 48 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 49 50 51 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 52 ; CHECK: vaddsd 53 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 54 ret <2 x double> %res 55 } 56 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 57 58 59 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 60 ; CHECK: vcmpordpd 61 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 62 ret <2 x double> %res 63 } 64 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 65 66 67 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 68 ; CHECK: vcmpordsd 69 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 70 ret <2 x double> %res 71 } 72 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 73 74 75 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 76 ; CHECK: vcomisd 77 ; CHECK: sete 78 ; CHECK: movzbl 79 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 80 ret i32 %res 81 } 82 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 83 84 85 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 86 ; CHECK: vcomisd 87 ; CHECK: setae 88 ; CHECK: movzbl 89 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 90 ret i32 %res 91 } 92 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 93 94 95 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 96 ; CHECK: vcomisd 97 ; CHECK: seta 98 ; CHECK: movzbl 99 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 100 ret i32 %res 101 } 102 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 103 104 105 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 106 ; CHECK: vcomisd 107 ; CHECK: setbe 108 ; CHECK: movzbl 109 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 110 ret i32 %res 111 } 112 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 113 114 115 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 116 ; CHECK: vcomisd 117 ; CHECK: sbbl %eax, %eax 118 ; CHECK: andl $1, %eax 119 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 120 ret i32 %res 121 } 122 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 123 124 125 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 126 ; CHECK: vcomisd 127 ; CHECK: setne 128 ; CHECK: movzbl 129 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 130 ret i32 %res 131 } 132 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 133 134 135 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 136 ; CHECK: vcvtdq2pd 137 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 138 ret <2 x double> %res 139 } 140 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 141 142 143 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 144 ; CHECK: vcvtdq2ps 145 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 146 ret <4 x float> %res 147 } 148 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 149 150 151 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 152 ; CHECK: vcvtpd2dq 153 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 154 ret <4 x i32> %res 155 } 156 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 157 158 159 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 160 ; CHECK: vcvtpd2ps 161 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 162 ret <4 x float> %res 163 } 164 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 165 166 167 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 168 ; CHECK: vcvtps2dq 169 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 170 ret <4 x i32> %res 171 } 172 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 173 174 175 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 176 ; CHECK: vcvtps2pd 177 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 178 ret <2 x double> %res 179 } 180 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 181 182 183 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 184 ; CHECK: vcvtsd2si 185 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 186 ret i32 %res 187 } 188 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 189 190 191 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 192 ; CHECK: vcvtsd2ss 193 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 194 ret <4 x float> %res 195 } 196 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 197 198 199 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 200 ; CHECK: movl 201 ; CHECK: vcvtsi2sd 202 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 203 ret <2 x double> %res 204 } 205 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 206 207 208 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 209 ; CHECK: vcvtss2sd 210 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 211 ret <2 x double> %res 212 } 213 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 214 215 216 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 217 ; CHECK: vcvttpd2dq 218 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 219 ret <4 x i32> %res 220 } 221 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 222 223 224 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 225 ; CHECK: vcvttps2dq 226 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 227 ret <4 x i32> %res 228 } 229 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 230 231 232 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 233 ; CHECK: vcvttsd2si 234 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 235 ret i32 %res 236 } 237 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 238 239 240 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 241 ; CHECK: vdivsd 242 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 243 ret <2 x double> %res 244 } 245 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 246 247 248 define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) { 249 ; CHECK: movl 250 ; CHECK: vmovups 251 %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 252 ret <16 x i8> %res 253 } 254 declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly 255 256 257 define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) { 258 ; CHECK: movl 259 ; CHECK: vmovups 260 %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1] 261 ret <2 x double> %res 262 } 263 declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly 264 265 266 define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { 267 ; CHECK: pushl 268 ; CHECK: movl 269 ; CHECK: vmaskmovdqu 270 ; CHECK: popl 271 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) 272 ret void 273 } 274 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 275 276 277 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 278 ; CHECK: vmaxpd 279 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 280 ret <2 x double> %res 281 } 282 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 283 284 285 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 286 ; CHECK: vmaxsd 287 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 288 ret <2 x double> %res 289 } 290 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 291 292 293 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 294 ; CHECK: vminpd 295 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 296 ret <2 x double> %res 297 } 298 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 299 300 301 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 302 ; CHECK: vminsd 303 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 304 ret <2 x double> %res 305 } 306 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 307 308 309 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 310 ; CHECK: vmovmskpd 311 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 312 ret i32 %res 313 } 314 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 315 316 317 define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { 318 ; CHECK: movl 319 ; CHECK: vmovntdq 320 call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) 321 ret void 322 } 323 declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind 324 325 326 define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { 327 ; CHECK: movl 328 ; CHECK: vmovntpd 329 call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) 330 ret void 331 } 332 declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind 333 334 335 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 336 ; CHECK: vmulsd 337 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 338 ret <2 x double> %res 339 } 340 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 341 342 343 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 344 ; CHECK: vpackssdw 345 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 346 ret <8 x i16> %res 347 } 348 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 349 350 351 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 352 ; CHECK: vpacksswb 353 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 354 ret <16 x i8> %res 355 } 356 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 357 358 359 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 360 ; CHECK: vpackuswb 361 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 362 ret <16 x i8> %res 363 } 364 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 365 366 367 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 368 ; CHECK: vpaddsb 369 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 370 ret <16 x i8> %res 371 } 372 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 373 374 375 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 376 ; CHECK: vpaddsw 377 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 378 ret <8 x i16> %res 379 } 380 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 381 382 383 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 384 ; CHECK: vpaddusb 385 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 386 ret <16 x i8> %res 387 } 388 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 389 390 391 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 392 ; CHECK: vpaddusw 393 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 394 ret <8 x i16> %res 395 } 396 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 397 398 399 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 400 ; CHECK: vpavgb 401 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 402 ret <16 x i8> %res 403 } 404 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 405 406 407 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 408 ; CHECK: vpavgw 409 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 410 ret <8 x i16> %res 411 } 412 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 413 414 415 define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) { 416 ; CHECK: vpcmpeqb 417 %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 418 ret <16 x i8> %res 419 } 420 declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone 421 422 423 define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) { 424 ; CHECK: vpcmpeqd 425 %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 426 ret <4 x i32> %res 427 } 428 declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone 429 430 431 define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) { 432 ; CHECK: vpcmpeqw 433 %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 434 ret <8 x i16> %res 435 } 436 declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone 437 438 439 define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) { 440 ; CHECK: vpcmpgtb 441 %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 442 ret <16 x i8> %res 443 } 444 declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone 445 446 447 define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) { 448 ; CHECK: vpcmpgtd 449 %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 450 ret <4 x i32> %res 451 } 452 declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone 453 454 455 define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) { 456 ; CHECK: vpcmpgtw 457 %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 458 ret <8 x i16> %res 459 } 460 declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone 461 462 463 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 464 ; CHECK: vpmaddwd 465 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 466 ret <4 x i32> %res 467 } 468 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 469 470 471 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 472 ; CHECK: vpmaxsw 473 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 474 ret <8 x i16> %res 475 } 476 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 477 478 479 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 480 ; CHECK: vpmaxub 481 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 482 ret <16 x i8> %res 483 } 484 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 485 486 487 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 488 ; CHECK: vpminsw 489 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 490 ret <8 x i16> %res 491 } 492 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 493 494 495 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 496 ; CHECK: vpminub 497 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 498 ret <16 x i8> %res 499 } 500 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 501 502 503 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 504 ; CHECK: vpmovmskb 505 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 506 ret i32 %res 507 } 508 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 509 510 511 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 512 ; CHECK: vpmulhw 513 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 514 ret <8 x i16> %res 515 } 516 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 517 518 519 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 520 ; CHECK: vpmulhuw 521 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 522 ret <8 x i16> %res 523 } 524 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 525 526 527 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 528 ; CHECK: vpmuludq 529 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 530 ret <2 x i64> %res 531 } 532 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 533 534 535 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 536 ; CHECK: vpsadbw 537 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 538 ret <2 x i64> %res 539 } 540 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 541 542 543 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 544 ; CHECK: vpslld 545 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 546 ret <4 x i32> %res 547 } 548 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 549 550 551 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 552 ; CHECK: vpslldq 553 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 554 ret <2 x i64> %res 555 } 556 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 557 558 559 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 560 ; CHECK: vpslldq 561 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 562 ret <2 x i64> %res 563 } 564 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 565 566 567 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 568 ; CHECK: vpsllq 569 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 570 ret <2 x i64> %res 571 } 572 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 573 574 575 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 576 ; CHECK: vpsllw 577 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 578 ret <8 x i16> %res 579 } 580 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 581 582 583 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 584 ; CHECK: vpslld 585 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 586 ret <4 x i32> %res 587 } 588 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 589 590 591 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 592 ; CHECK: vpsllq 593 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 594 ret <2 x i64> %res 595 } 596 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 597 598 599 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 600 ; CHECK: vpsllw 601 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 602 ret <8 x i16> %res 603 } 604 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 605 606 607 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 608 ; CHECK: vpsrad 609 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 610 ret <4 x i32> %res 611 } 612 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 613 614 615 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 616 ; CHECK: vpsraw 617 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 618 ret <8 x i16> %res 619 } 620 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 621 622 623 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 624 ; CHECK: vpsrad 625 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 626 ret <4 x i32> %res 627 } 628 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 629 630 631 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 632 ; CHECK: vpsraw 633 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 634 ret <8 x i16> %res 635 } 636 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 637 638 639 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 640 ; CHECK: vpsrld 641 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 642 ret <4 x i32> %res 643 } 644 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 645 646 647 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 648 ; CHECK: vpsrldq 649 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 650 ret <2 x i64> %res 651 } 652 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 653 654 655 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 656 ; CHECK: vpsrldq 657 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 658 ret <2 x i64> %res 659 } 660 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 661 662 663 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 664 ; CHECK: vpsrlq 665 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 666 ret <2 x i64> %res 667 } 668 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 669 670 671 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 672 ; CHECK: vpsrlw 673 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 674 ret <8 x i16> %res 675 } 676 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 677 678 679 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 680 ; CHECK: vpsrld 681 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 682 ret <4 x i32> %res 683 } 684 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 685 686 687 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 688 ; CHECK: vpsrlq 689 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 690 ret <2 x i64> %res 691 } 692 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 693 694 695 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 696 ; CHECK: vpsrlw 697 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 698 ret <8 x i16> %res 699 } 700 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 701 702 703 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 704 ; CHECK: vpsubsb 705 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 706 ret <16 x i8> %res 707 } 708 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 709 710 711 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 712 ; CHECK: vpsubsw 713 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 714 ret <8 x i16> %res 715 } 716 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 717 718 719 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 720 ; CHECK: vpsubusb 721 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 722 ret <16 x i8> %res 723 } 724 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 725 726 727 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 728 ; CHECK: vpsubusw 729 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 730 ret <8 x i16> %res 731 } 732 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 733 734 735 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 736 ; CHECK: vsqrtpd 737 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 738 ret <2 x double> %res 739 } 740 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 741 742 743 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 744 ; CHECK: vsqrtsd 745 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 746 ret <2 x double> %res 747 } 748 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 749 750 751 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 752 ; CHECK: movl 753 ; CHECK: vmovq 754 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 755 ret void 756 } 757 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 758 759 760 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 761 ; CHECK: movl 762 ; CHECK: vmovdqu 763 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) 764 ret void 765 } 766 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 767 768 769 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 770 ; CHECK: movl 771 ; CHECK: vmovupd 772 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) 773 ret void 774 } 775 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 776 777 778 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 779 ; CHECK: vsubsd 780 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 781 ret <2 x double> %res 782 } 783 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 784 785 786 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 787 ; CHECK: vucomisd 788 ; CHECK: sete 789 ; CHECK: movzbl 790 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 791 ret i32 %res 792 } 793 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 794 795 796 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 797 ; CHECK: vucomisd 798 ; CHECK: setae 799 ; CHECK: movzbl 800 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 801 ret i32 %res 802 } 803 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 804 805 806 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 807 ; CHECK: vucomisd 808 ; CHECK: seta 809 ; CHECK: movzbl 810 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 811 ret i32 %res 812 } 813 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 814 815 816 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 817 ; CHECK: vucomisd 818 ; CHECK: setbe 819 ; CHECK: movzbl 820 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 821 ret i32 %res 822 } 823 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 824 825 826 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 827 ; CHECK: vucomisd 828 ; CHECK: sbbl 829 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 830 ret i32 %res 831 } 832 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 833 834 835 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 836 ; CHECK: vucomisd 837 ; CHECK: setne 838 ; CHECK: movzbl 839 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 840 ret i32 %res 841 } 842 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 843 844 845 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 846 ; CHECK: vaddsubpd 847 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 848 ret <2 x double> %res 849 } 850 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 851 852 853 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 854 ; CHECK: vaddsubps 855 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 856 ret <4 x float> %res 857 } 858 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 859 860 861 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 862 ; CHECK: vhaddpd 863 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 864 ret <2 x double> %res 865 } 866 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 867 868 869 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 870 ; CHECK: vhaddps 871 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 872 ret <4 x float> %res 873 } 874 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 875 876 877 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 878 ; CHECK: vhsubpd 879 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 880 ret <2 x double> %res 881 } 882 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 883 884 885 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 886 ; CHECK: vhsubps 887 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 888 ret <4 x float> %res 889 } 890 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 891 892 893 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 894 ; CHECK: movl 895 ; CHECK: vlddqu 896 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 897 ret <16 x i8> %res 898 } 899 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 900 901 902 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 903 ; CHECK: vblendpd 904 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 905 ret <2 x double> %res 906 } 907 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone 908 909 910 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 911 ; CHECK: vblendps 912 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 913 ret <4 x float> %res 914 } 915 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone 916 917 918 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 919 ; CHECK: vblendvpd 920 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 921 ret <2 x double> %res 922 } 923 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 924 925 926 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 927 ; CHECK: vblendvps 928 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 929 ret <4 x float> %res 930 } 931 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 932 933 934 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 935 ; CHECK: vdppd 936 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 937 ret <2 x double> %res 938 } 939 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone 940 941 942 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 943 ; CHECK: vdpps 944 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 945 ret <4 x float> %res 946 } 947 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone 948 949 950 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 951 ; CHECK: vinsertps 952 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 953 ret <4 x float> %res 954 } 955 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 956 957 958 define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) { 959 ; CHECK: movl 960 ; CHECK: vmovntdqa 961 %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1] 962 ret <2 x i64> %res 963 } 964 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly 965 966 967 define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 968 ; CHECK: vmpsadbw 969 %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1] 970 ret <16 x i8> %res 971 } 972 declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone 973 974 975 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 976 ; CHECK: vpackusdw 977 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 978 ret <8 x i16> %res 979 } 980 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 981 982 983 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 984 ; CHECK: vpblendvb 985 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 986 ret <16 x i8> %res 987 } 988 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 989 990 991 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 992 ; CHECK: vpblendw 993 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] 994 ret <8 x i16> %res 995 } 996 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone 997 998 999 define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) { 1000 ; CHECK: vpcmpeqq 1001 %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1002 ret <2 x i64> %res 1003 } 1004 declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone 1005 1006 1007 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 1008 ; CHECK: vphminposuw 1009 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1010 ret <8 x i16> %res 1011 } 1012 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 1013 1014 1015 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 1016 ; CHECK: vpmaxsb 1017 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1018 ret <16 x i8> %res 1019 } 1020 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 1021 1022 1023 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 1024 ; CHECK: vpmaxsd 1025 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1026 ret <4 x i32> %res 1027 } 1028 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 1029 1030 1031 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 1032 ; CHECK: vpmaxud 1033 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1034 ret <4 x i32> %res 1035 } 1036 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 1037 1038 1039 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 1040 ; CHECK: vpmaxuw 1041 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1042 ret <8 x i16> %res 1043 } 1044 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 1045 1046 1047 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 1048 ; CHECK: vpminsb 1049 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1050 ret <16 x i8> %res 1051 } 1052 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 1053 1054 1055 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 1056 ; CHECK: vpminsd 1057 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1058 ret <4 x i32> %res 1059 } 1060 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 1061 1062 1063 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 1064 ; CHECK: vpminud 1065 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1066 ret <4 x i32> %res 1067 } 1068 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 1069 1070 1071 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 1072 ; CHECK: vpminuw 1073 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1074 ret <8 x i16> %res 1075 } 1076 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 1077 1078 1079 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 1080 ; CHECK: vpmovsxbd 1081 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1082 ret <4 x i32> %res 1083 } 1084 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 1085 1086 1087 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 1088 ; CHECK: vpmovsxbq 1089 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1090 ret <2 x i64> %res 1091 } 1092 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 1093 1094 1095 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 1096 ; CHECK: vpmovsxbw 1097 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1098 ret <8 x i16> %res 1099 } 1100 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 1101 1102 1103 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 1104 ; CHECK: vpmovsxdq 1105 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1106 ret <2 x i64> %res 1107 } 1108 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 1109 1110 1111 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 1112 ; CHECK: vpmovsxwd 1113 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1114 ret <4 x i32> %res 1115 } 1116 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 1117 1118 1119 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 1120 ; CHECK: vpmovsxwq 1121 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1122 ret <2 x i64> %res 1123 } 1124 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 1125 1126 1127 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1128 ; CHECK: vpmovzxbd 1129 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1130 ret <4 x i32> %res 1131 } 1132 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1133 1134 1135 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1136 ; CHECK: vpmovzxbq 1137 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1138 ret <2 x i64> %res 1139 } 1140 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1141 1142 1143 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1144 ; CHECK: vpmovzxbw 1145 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1146 ret <8 x i16> %res 1147 } 1148 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1149 1150 1151 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1152 ; CHECK: vpmovzxdq 1153 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1154 ret <2 x i64> %res 1155 } 1156 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1157 1158 1159 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1160 ; CHECK: vpmovzxwd 1161 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1162 ret <4 x i32> %res 1163 } 1164 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1165 1166 1167 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1168 ; CHECK: vpmovzxwq 1169 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1170 ret <2 x i64> %res 1171 } 1172 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1173 1174 1175 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1176 ; CHECK: vpmuldq 1177 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1178 ret <2 x i64> %res 1179 } 1180 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1181 1182 1183 define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { 1184 ; CHECK: vptest 1185 ; CHECK: sbbl 1186 %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1187 ret i32 %res 1188 } 1189 declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone 1190 1191 1192 define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { 1193 ; CHECK: vptest 1194 ; CHECK: seta 1195 ; CHECK: movzbl 1196 %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1197 ret i32 %res 1198 } 1199 declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone 1200 1201 1202 define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { 1203 ; CHECK: vptest 1204 ; CHECK: sete 1205 ; CHECK: movzbl 1206 %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1207 ret i32 %res 1208 } 1209 declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone 1210 1211 1212 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1213 ; CHECK: vroundpd 1214 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1215 ret <2 x double> %res 1216 } 1217 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1218 1219 1220 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1221 ; CHECK: vroundps 1222 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1223 ret <4 x float> %res 1224 } 1225 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1226 1227 1228 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1229 ; CHECK: vroundsd 1230 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1231 ret <2 x double> %res 1232 } 1233 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1234 1235 1236 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1237 ; CHECK: vroundss 1238 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1239 ret <4 x float> %res 1240 } 1241 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1242 1243 1244 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1245 ; CHECK: movl 1246 ; CHECK: movl 1247 ; CHECK: vpcmpestri 1248 ; CHECK: movl 1249 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1250 ret i32 %res 1251 } 1252 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1253 1254 1255 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1256 ; CHECK: movl 1257 ; CHECK: movl 1258 ; CHECK: vpcmpestri 1259 ; CHECK: movl 1260 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1261 ret i32 %res 1262 } 1263 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1264 1265 1266 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1267 ; CHECK: movl 1268 ; CHECK: movl 1269 ; CHECK: vpcmpestri 1270 ; CHECK: movl 1271 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1272 ret i32 %res 1273 } 1274 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1275 1276 1277 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1278 ; CHECK: movl 1279 ; CHECK: movl 1280 ; CHECK: vpcmpestri 1281 ; CHECK: movl 1282 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1283 ret i32 %res 1284 } 1285 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1286 1287 1288 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1289 ; CHECK: movl 1290 ; CHECK: movl 1291 ; CHECK: vpcmpestri 1292 ; CHECK: movl 1293 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1294 ret i32 %res 1295 } 1296 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1297 1298 1299 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1300 ; CHECK: movl 1301 ; CHECK: movl 1302 ; CHECK: vpcmpestri 1303 ; CHECK: movl 1304 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1305 ret i32 %res 1306 } 1307 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1308 1309 1310 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1311 ; CHECK: movl 1312 ; CHECK: movl 1313 ; CHECK: vpcmpestrm 1314 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1315 ret <16 x i8> %res 1316 } 1317 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1318 1319 1320 define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) { 1321 ; CHECK: vpcmpgtq 1322 %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1323 ret <2 x i64> %res 1324 } 1325 declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone 1326 1327 1328 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1329 ; CHECK: vpcmpistri 1330 ; CHECK: movl 1331 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1332 ret i32 %res 1333 } 1334 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1335 1336 1337 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1338 ; CHECK: vpcmpistri 1339 ; CHECK: movl 1340 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1341 ret i32 %res 1342 } 1343 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1344 1345 1346 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1347 ; CHECK: vpcmpistri 1348 ; CHECK: movl 1349 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1350 ret i32 %res 1351 } 1352 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1353 1354 1355 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1356 ; CHECK: vpcmpistri 1357 ; CHECK: movl 1358 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1359 ret i32 %res 1360 } 1361 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1362 1363 1364 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1365 ; CHECK: vpcmpistri 1366 ; CHECK: movl 1367 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1368 ret i32 %res 1369 } 1370 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1371 1372 1373 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1374 ; CHECK: vpcmpistri 1375 ; CHECK: movl 1376 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1377 ret i32 %res 1378 } 1379 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1380 1381 1382 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1383 ; CHECK: vpcmpistrm 1384 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1385 ret <16 x i8> %res 1386 } 1387 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1388 1389 1390 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1391 ; CHECK: vaddss 1392 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1393 ret <4 x float> %res 1394 } 1395 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1396 1397 1398 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1399 ; CHECK: vcmpordps 1400 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1401 ret <4 x float> %res 1402 } 1403 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1404 1405 1406 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1407 ; CHECK: vcmpordss 1408 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1409 ret <4 x float> %res 1410 } 1411 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1412 1413 1414 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1415 ; CHECK: vcomiss 1416 ; CHECK: sete 1417 ; CHECK: movzbl 1418 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1419 ret i32 %res 1420 } 1421 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1422 1423 1424 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1425 ; CHECK: vcomiss 1426 ; CHECK: setae 1427 ; CHECK: movzbl 1428 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1429 ret i32 %res 1430 } 1431 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1432 1433 1434 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1435 ; CHECK: vcomiss 1436 ; CHECK: seta 1437 ; CHECK: movzbl 1438 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1439 ret i32 %res 1440 } 1441 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1442 1443 1444 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1445 ; CHECK: vcomiss 1446 ; CHECK: setbe 1447 ; CHECK: movzbl 1448 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1449 ret i32 %res 1450 } 1451 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1452 1453 1454 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1455 ; CHECK: vcomiss 1456 ; CHECK: sbb 1457 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1458 ret i32 %res 1459 } 1460 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1461 1462 1463 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1464 ; CHECK: vcomiss 1465 ; CHECK: setne 1466 ; CHECK: movzbl 1467 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1468 ret i32 %res 1469 } 1470 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1471 1472 1473 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1474 ; CHECK: movl 1475 ; CHECK: vcvtsi2ss 1476 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1477 ret <4 x float> %res 1478 } 1479 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1480 1481 1482 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1483 ; CHECK: vcvtss2si 1484 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1485 ret i32 %res 1486 } 1487 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1488 1489 1490 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1491 ; CHECK: vcvttss2si 1492 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1493 ret i32 %res 1494 } 1495 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1496 1497 1498 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1499 ; CHECK: vdivss 1500 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1501 ret <4 x float> %res 1502 } 1503 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1504 1505 1506 define void @test_x86_sse_ldmxcsr(i8* %a0) { 1507 ; CHECK: movl 1508 ; CHECK: vldmxcsr 1509 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1510 ret void 1511 } 1512 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1513 1514 1515 define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) { 1516 ; CHECK: movl 1517 ; CHECK: vmovups 1518 %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1] 1519 ret <4 x float> %res 1520 } 1521 declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly 1522 1523 1524 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1525 ; CHECK: vmaxps 1526 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1527 ret <4 x float> %res 1528 } 1529 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1530 1531 1532 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1533 ; CHECK: vmaxss 1534 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1535 ret <4 x float> %res 1536 } 1537 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1538 1539 1540 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1541 ; CHECK: vminps 1542 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1543 ret <4 x float> %res 1544 } 1545 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1546 1547 1548 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1549 ; CHECK: vminss 1550 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1551 ret <4 x float> %res 1552 } 1553 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1554 1555 1556 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1557 ; CHECK: vmovmskps 1558 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1559 ret i32 %res 1560 } 1561 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1562 1563 1564 define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) { 1565 ; CHECK: movl 1566 ; CHECK: vmovntps 1567 call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1) 1568 ret void 1569 } 1570 declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind 1571 1572 1573 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1574 ; CHECK: vmulss 1575 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1576 ret <4 x float> %res 1577 } 1578 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1579 1580 1581 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1582 ; CHECK: vrcpps 1583 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1584 ret <4 x float> %res 1585 } 1586 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1587 1588 1589 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1590 ; CHECK: vrcpss 1591 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1592 ret <4 x float> %res 1593 } 1594 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1595 1596 1597 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1598 ; CHECK: vrsqrtps 1599 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1600 ret <4 x float> %res 1601 } 1602 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1603 1604 1605 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 1606 ; CHECK: vrsqrtss 1607 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1608 ret <4 x float> %res 1609 } 1610 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1611 1612 1613 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 1614 ; CHECK: vsqrtps 1615 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1616 ret <4 x float> %res 1617 } 1618 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1619 1620 1621 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 1622 ; CHECK: vsqrtss 1623 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1624 ret <4 x float> %res 1625 } 1626 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1627 1628 1629 define void @test_x86_sse_stmxcsr(i8* %a0) { 1630 ; CHECK: movl 1631 ; CHECK: vstmxcsr 1632 call void @llvm.x86.sse.stmxcsr(i8* %a0) 1633 ret void 1634 } 1635 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 1636 1637 1638 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 1639 ; CHECK: movl 1640 ; CHECK: vmovups 1641 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 1642 ret void 1643 } 1644 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 1645 1646 1647 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 1648 ; CHECK: vsubss 1649 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1650 ret <4 x float> %res 1651 } 1652 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1653 1654 1655 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 1656 ; CHECK: vucomiss 1657 ; CHECK: sete 1658 ; CHECK: movzbl 1659 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1660 ret i32 %res 1661 } 1662 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1663 1664 1665 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 1666 ; CHECK: vucomiss 1667 ; CHECK: setae 1668 ; CHECK: movzbl 1669 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1670 ret i32 %res 1671 } 1672 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 1673 1674 1675 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 1676 ; CHECK: vucomiss 1677 ; CHECK: seta 1678 ; CHECK: movzbl 1679 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1680 ret i32 %res 1681 } 1682 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 1683 1684 1685 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 1686 ; CHECK: vucomiss 1687 ; CHECK: setbe 1688 ; CHECK: movzbl 1689 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1690 ret i32 %res 1691 } 1692 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 1693 1694 1695 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 1696 ; CHECK: vucomiss 1697 ; CHECK: sbbl 1698 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1699 ret i32 %res 1700 } 1701 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 1702 1703 1704 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 1705 ; CHECK: vucomiss 1706 ; CHECK: setne 1707 ; CHECK: movzbl 1708 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1709 ret i32 %res 1710 } 1711 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 1712 1713 1714 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 1715 ; CHECK: vpabsb 1716 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 1717 ret <16 x i8> %res 1718 } 1719 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 1720 1721 1722 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 1723 ; CHECK: vpabsd 1724 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 1725 ret <4 x i32> %res 1726 } 1727 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 1728 1729 1730 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 1731 ; CHECK: vpabsw 1732 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1733 ret <8 x i16> %res 1734 } 1735 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 1736 1737 1738 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1739 ; CHECK: vphaddd 1740 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1741 ret <4 x i32> %res 1742 } 1743 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1744 1745 1746 define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) { 1747 ; CHECK: vphaddsw 1748 %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1749 ret <4 x i32> %res 1750 } 1751 declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone 1752 1753 1754 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1755 ; CHECK: vphaddw 1756 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1757 ret <8 x i16> %res 1758 } 1759 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1760 1761 1762 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1763 ; CHECK: vphsubd 1764 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1765 ret <4 x i32> %res 1766 } 1767 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1768 1769 1770 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1771 ; CHECK: vphsubsw 1772 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1773 ret <8 x i16> %res 1774 } 1775 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1776 1777 1778 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1779 ; CHECK: vphsubw 1780 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1781 ret <8 x i16> %res 1782 } 1783 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1784 1785 1786 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1787 ; CHECK: vpmaddubsw 1788 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1789 ret <8 x i16> %res 1790 } 1791 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1792 1793 1794 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1795 ; CHECK: vpmulhrsw 1796 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1797 ret <8 x i16> %res 1798 } 1799 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1800 1801 1802 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1803 ; CHECK: vpshufb 1804 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1805 ret <16 x i8> %res 1806 } 1807 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1808 1809 1810 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1811 ; CHECK: vpsignb 1812 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1813 ret <16 x i8> %res 1814 } 1815 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1816 1817 1818 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1819 ; CHECK: vpsignd 1820 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1821 ret <4 x i32> %res 1822 } 1823 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1824 1825 1826 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1827 ; CHECK: vpsignw 1828 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1829 ret <8 x i16> %res 1830 } 1831 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1832 1833 1834 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1835 ; CHECK: vaddsubpd 1836 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1837 ret <4 x double> %res 1838 } 1839 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1840 1841 1842 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1843 ; CHECK: vaddsubps 1844 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1845 ret <8 x float> %res 1846 } 1847 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1848 1849 1850 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 1851 ; CHECK: vblendpd 1852 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 1853 ret <4 x double> %res 1854 } 1855 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 1856 1857 1858 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 1859 ; CHECK: vblendps 1860 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1861 ret <8 x float> %res 1862 } 1863 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1864 1865 1866 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 1867 ; CHECK: vblendvpd 1868 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 1869 ret <4 x double> %res 1870 } 1871 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 1872 1873 1874 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 1875 ; CHECK: vblendvps 1876 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 1877 ret <8 x float> %res 1878 } 1879 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 1880 1881 1882 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 1883 ; CHECK: vcmpordpd 1884 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 1885 ret <4 x double> %res 1886 } 1887 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 1888 1889 1890 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1891 ; CHECK: vcmpordps 1892 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 1893 ret <8 x float> %res 1894 } 1895 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 1896 1897 1898 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 1899 ; CHECK: vcvtpd2psy 1900 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 1901 ret <4 x float> %res 1902 } 1903 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 1904 1905 1906 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 1907 ; CHECK: vcvtpd2dqy 1908 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1909 ret <4 x i32> %res 1910 } 1911 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 1912 1913 1914 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 1915 ; CHECK: vcvtps2pd 1916 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 1917 ret <4 x double> %res 1918 } 1919 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 1920 1921 1922 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 1923 ; CHECK: vcvtps2dq 1924 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1925 ret <8 x i32> %res 1926 } 1927 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 1928 1929 1930 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 1931 ; CHECK: vcvtdq2pd 1932 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 1933 ret <4 x double> %res 1934 } 1935 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 1936 1937 1938 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 1939 ; CHECK: vcvtdq2ps 1940 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 1941 ret <8 x float> %res 1942 } 1943 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 1944 1945 1946 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 1947 ; CHECK: vcvttpd2dqy 1948 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1949 ret <4 x i32> %res 1950 } 1951 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 1952 1953 1954 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 1955 ; CHECK: vcvttps2dq 1956 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1957 ret <8 x i32> %res 1958 } 1959 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 1960 1961 1962 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1963 ; CHECK: vdpps 1964 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1965 ret <8 x float> %res 1966 } 1967 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1968 1969 1970 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 1971 ; CHECK: vhaddpd 1972 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1973 ret <4 x double> %res 1974 } 1975 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 1976 1977 1978 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 1979 ; CHECK: vhaddps 1980 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1981 ret <8 x float> %res 1982 } 1983 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 1984 1985 1986 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1987 ; CHECK: vhsubpd 1988 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1989 ret <4 x double> %res 1990 } 1991 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1992 1993 1994 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1995 ; CHECK: vhsubps 1996 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1997 ret <8 x float> %res 1998 } 1999 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2000 2001 2002 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 2003 ; CHECK: vlddqu 2004 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2005 ret <32 x i8> %res 2006 } 2007 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2008 2009 2010 define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) { 2011 ; CHECK: vmovdqu 2012 %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2013 ret <32 x i8> %res 2014 } 2015 declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly 2016 2017 2018 define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) { 2019 ; CHECK: vmovupd 2020 %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2021 ret <4 x double> %res 2022 } 2023 declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly 2024 2025 2026 define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) { 2027 ; CHECK: vmovups 2028 %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2029 ret <8 x float> %res 2030 } 2031 declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly 2032 2033 2034 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { 2035 ; CHECK: vmaskmovpd 2036 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 2037 ret <2 x double> %res 2038 } 2039 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly 2040 2041 2042 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { 2043 ; CHECK: vmaskmovpd 2044 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2045 ret <4 x double> %res 2046 } 2047 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly 2048 2049 2050 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { 2051 ; CHECK: vmaskmovps 2052 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2053 ret <4 x float> %res 2054 } 2055 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly 2056 2057 2058 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { 2059 ; CHECK: vmaskmovps 2060 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2061 ret <8 x float> %res 2062 } 2063 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly 2064 2065 2066 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { 2067 ; CHECK: vmaskmovpd 2068 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) 2069 ret void 2070 } 2071 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind 2072 2073 2074 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { 2075 ; CHECK: vmaskmovpd 2076 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) 2077 ret void 2078 } 2079 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind 2080 2081 2082 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { 2083 ; CHECK: vmaskmovps 2084 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) 2085 ret void 2086 } 2087 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind 2088 2089 2090 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { 2091 ; CHECK: vmaskmovps 2092 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) 2093 ret void 2094 } 2095 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind 2096 2097 2098 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2099 ; CHECK: vmaxpd 2100 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2101 ret <4 x double> %res 2102 } 2103 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2104 2105 2106 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2107 ; CHECK: vmaxps 2108 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2109 ret <8 x float> %res 2110 } 2111 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2112 2113 2114 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2115 ; CHECK: vminpd 2116 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2117 ret <4 x double> %res 2118 } 2119 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2120 2121 2122 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2123 ; CHECK: vminps 2124 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2125 ret <8 x float> %res 2126 } 2127 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2128 2129 2130 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2131 ; CHECK: vmovmskpd 2132 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2133 ret i32 %res 2134 } 2135 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2136 2137 2138 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2139 ; CHECK: vmovmskps 2140 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2141 ret i32 %res 2142 } 2143 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2144 2145 2146 define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) { 2147 ; CHECK: vmovntdq 2148 call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1) 2149 ret void 2150 } 2151 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 2152 2153 2154 define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) { 2155 ; CHECK: vmovntpd 2156 call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1) 2157 ret void 2158 } 2159 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 2160 2161 2162 define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) { 2163 ; CHECK: vmovntps 2164 call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1) 2165 ret void 2166 } 2167 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 2168 2169 2170 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2171 ; CHECK: vptest 2172 ; CHECK: sbbl 2173 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2174 ret i32 %res 2175 } 2176 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2177 2178 2179 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2180 ; CHECK: vptest 2181 ; CHECK: seta 2182 ; CHECK: movzbl 2183 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2184 ret i32 %res 2185 } 2186 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2187 2188 2189 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2190 ; CHECK: vptest 2191 ; CHECK: sete 2192 ; CHECK: movzbl 2193 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2194 ret i32 %res 2195 } 2196 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2197 2198 2199 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2200 ; CHECK: vrcpps 2201 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2202 ret <8 x float> %res 2203 } 2204 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2205 2206 2207 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2208 ; CHECK: vroundpd 2209 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2210 ret <4 x double> %res 2211 } 2212 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2213 2214 2215 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2216 ; CHECK: vroundps 2217 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2218 ret <8 x float> %res 2219 } 2220 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2221 2222 2223 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2224 ; CHECK: vrsqrtps 2225 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2226 ret <8 x float> %res 2227 } 2228 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2229 2230 2231 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2232 ; CHECK: vsqrtpd 2233 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2234 ret <4 x double> %res 2235 } 2236 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2237 2238 2239 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2240 ; CHECK: vsqrtps 2241 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2242 ret <8 x float> %res 2243 } 2244 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2245 2246 2247 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2248 ; CHECK: vmovdqu 2249 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1) 2250 ret void 2251 } 2252 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2253 2254 2255 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2256 ; CHECK: vmovupd 2257 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1) 2258 ret void 2259 } 2260 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2261 2262 2263 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2264 ; CHECK: vmovups 2265 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2266 ret void 2267 } 2268 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2269 2270 2271 define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { 2272 ; CHECK: vbroadcastsd 2273 %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2274 ret <4 x double> %res 2275 } 2276 declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly 2277 2278 2279 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2280 ; CHECK: vbroadcastf128 2281 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2282 ret <4 x double> %res 2283 } 2284 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2285 2286 2287 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2288 ; CHECK: vbroadcastf128 2289 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2290 ret <8 x float> %res 2291 } 2292 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2293 2294 2295 define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) { 2296 ; CHECK: vbroadcastss 2297 %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1] 2298 ret <4 x float> %res 2299 } 2300 declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly 2301 2302 2303 define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) { 2304 ; CHECK: vbroadcastss 2305 %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1] 2306 ret <8 x float> %res 2307 } 2308 declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly 2309 2310 2311 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { 2312 ; CHECK: vextractf128 2313 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2314 ret <2 x double> %res 2315 } 2316 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 2317 2318 2319 define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { 2320 ; CHECK: vextractf128 2321 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2322 ret <4 x float> %res 2323 } 2324 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 2325 2326 2327 define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { 2328 ; CHECK: vextractf128 2329 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] 2330 ret <4 x i32> %res 2331 } 2332 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 2333 2334 2335 define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { 2336 ; CHECK: vinsertf128 2337 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2338 ret <4 x double> %res 2339 } 2340 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 2341 2342 2343 define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { 2344 ; CHECK: vinsertf128 2345 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2346 ret <8 x float> %res 2347 } 2348 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 2349 2350 2351 define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { 2352 ; CHECK: vinsertf128 2353 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2354 ret <8 x i32> %res 2355 } 2356 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 2357 2358 2359 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2360 ; CHECK: vperm2f128 2361 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2362 ret <4 x double> %res 2363 } 2364 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2365 2366 2367 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2368 ; CHECK: vperm2f128 2369 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2370 ret <8 x float> %res 2371 } 2372 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2373 2374 2375 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2376 ; CHECK: vperm2f128 2377 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2378 ret <8 x i32> %res 2379 } 2380 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2381 2382 2383 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2384 ; CHECK: vpermilpd 2385 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2386 ret <2 x double> %res 2387 } 2388 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 2389 2390 2391 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 2392 ; CHECK: vpermilpd 2393 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 2394 ret <4 x double> %res 2395 } 2396 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 2397 2398 2399 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 2400 ; CHECK: vpermilps 2401 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2402 ret <4 x float> %res 2403 } 2404 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 2405 2406 2407 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 2408 ; CHECK: vpermilps 2409 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 2410 ret <8 x float> %res 2411 } 2412 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 2413 2414 2415 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 2416 ; CHECK: vpermilpd 2417 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 2418 ret <2 x double> %res 2419 } 2420 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 2421 2422 2423 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 2424 ; CHECK: vpermilpd 2425 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 2426 ret <4 x double> %res 2427 } 2428 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 2429 2430 2431 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 2432 ; CHECK: vpermilps 2433 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 2434 ret <4 x float> %res 2435 } 2436 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 2437 2438 2439 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 2440 ; CHECK: vpermilps 2441 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 2442 ret <8 x float> %res 2443 } 2444 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 2445 2446 2447 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 2448 ; CHECK: vtestpd 2449 ; CHECK: sbbl 2450 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2451 ret i32 %res 2452 } 2453 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 2454 2455 2456 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2457 ; CHECK: vtestpd 2458 ; CHECK: sbbl 2459 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2460 ret i32 %res 2461 } 2462 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2463 2464 2465 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 2466 ; CHECK: vtestps 2467 ; CHECK: sbbl 2468 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2469 ret i32 %res 2470 } 2471 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 2472 2473 2474 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2475 ; CHECK: vtestps 2476 ; CHECK: sbbl 2477 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2478 ret i32 %res 2479 } 2480 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2481 2482 2483 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 2484 ; CHECK: vtestpd 2485 ; CHECK: seta 2486 ; CHECK: movzbl 2487 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2488 ret i32 %res 2489 } 2490 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 2491 2492 2493 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2494 ; CHECK: vtestpd 2495 ; CHECK: seta 2496 ; CHECK: movzbl 2497 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2498 ret i32 %res 2499 } 2500 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2501 2502 2503 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 2504 ; CHECK: vtestps 2505 ; CHECK: seta 2506 ; CHECK: movzbl 2507 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2508 ret i32 %res 2509 } 2510 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 2511 2512 2513 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2514 ; CHECK: vtestps 2515 ; CHECK: seta 2516 ; CHECK: movzbl 2517 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2518 ret i32 %res 2519 } 2520 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2521 2522 2523 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 2524 ; CHECK: vtestpd 2525 ; CHECK: sete 2526 ; CHECK: movzbl 2527 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2528 ret i32 %res 2529 } 2530 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 2531 2532 2533 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 2534 ; CHECK: vtestpd 2535 ; CHECK: sete 2536 ; CHECK: movzbl 2537 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2538 ret i32 %res 2539 } 2540 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 2541 2542 2543 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 2544 ; CHECK: vtestps 2545 ; CHECK: sete 2546 ; CHECK: movzbl 2547 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2548 ret i32 %res 2549 } 2550 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 2551 2552 2553 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 2554 ; CHECK: vtestps 2555 ; CHECK: sete 2556 ; CHECK: movzbl 2557 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2558 ret i32 %res 2559 } 2560 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 2561 2562 2563 define void @test_x86_avx_vzeroall() { 2564 ; CHECK: vzeroall 2565 call void @llvm.x86.avx.vzeroall() 2566 ret void 2567 } 2568 declare void @llvm.x86.avx.vzeroall() nounwind 2569 2570 2571 define void @test_x86_avx_vzeroupper() { 2572 ; CHECK: vzeroupper 2573 call void @llvm.x86.avx.vzeroupper() 2574 ret void 2575 } 2576 declare void @llvm.x86.avx.vzeroupper() nounwind 2577 2578 2579